Skip to content

Commit

Permalink
add cell type annotation functions
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed Feb 8, 2024
1 parent f0f7bdc commit 6aac48e
Show file tree
Hide file tree
Showing 8 changed files with 1,168 additions and 18 deletions.
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(alive_identification)
export(annotation_consensus)
export(annotation_label_transfer)
export(cell_cycle_scoring)
export(doublet_identification)
Expand Down Expand Up @@ -66,6 +67,8 @@ importFrom(celldex,BlueprintEncodeData)
importFrom(celldex,MonacoImmuneData)
importFrom(crew,crew_controller_local)
importFrom(digest,digest)
importFrom(dplyr,"%>%")
importFrom(dplyr,case_when)
importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(dplyr,if_else)
Expand Down Expand Up @@ -94,8 +97,13 @@ importFrom(scater,isOutlier)
importFrom(scuttle,logNormCounts)
importFrom(scuttle,perCellQCMetrics)
importFrom(stats,as.formula)
importFrom(stringr,str_detect)
importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_subset)
importFrom(stringr,str_trim)
importFrom(stringr,str_which)
importFrom(targets,tar_config_get)
importFrom(targets,tar_option_set)
Expand Down
18 changes: 9 additions & 9 deletions R/execute_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,21 +168,21 @@ run_targets_pipeline <- function(
# tarchetypes::tar_files(name= reference_track,
# read_reference_file,
# deployment = "main"),
tar_target(filter_empty_droplets, filtered_file, deployment = "main"),
tar_target(tissue, tissue_file, deployment = "main"),
tar_target(sample_column, sample_column_file, deployment = "main"),
tar_target(reference_label_coarse, reference_label_coarse_id(tissue), deployment = "main"),
tar_target(reference_label_fine, reference_label_fine_id(tissue), deployment = "main"),
tar_target(do_filter_empty_droplets, filtered_file, deployment = "main"),
tar_target(tissue_type, tissue_file, deployment = "main"),
tar_target(sample_column_name, sample_column_file, deployment = "main"),
tar_target(reference_label_coarse, reference_label_coarse_id(tissue_type), deployment = "main"),
tar_target(reference_label_fine, reference_label_fine_id(tissue_type), deployment = "main"),
# Reading input files
tar_target(input_read, readRDS(read_file),
pattern = map(read_file),
iteration = "list", deployment = "main"),

tar_target(reference_read, reference_file, deployment = "main"),
tar_target(reference_read, switch((!is.null(reference_file)) + 1, NULL, readRDS(reference_file)), deployment = "main"),

# Identifying empty droplets
tar_target(empty_droplets_tbl,
empty_droplet_id(input_read, filter_empty_droplets),
empty_droplet_id(input_read, do_filter_empty_droplets),
pattern = map(input_read),
iteration = "list"),

Expand Down Expand Up @@ -235,7 +235,7 @@ run_targets_pipeline <- function(
iteration = "list"),

# Pre-processing output
tar_target(preprocessing_output_S, preprocessing_output(tissue,
tar_target(preprocessing_output_S, preprocessing_output(tissue_type,
non_batch_variation_removal_S,
alive_identification_tbl,
cell_cycle_score_tbl,
Expand All @@ -251,7 +251,7 @@ run_targets_pipeline <- function(
# pseudobulk preprocessing
tar_target(pseudobulk_preprocessing_SE, pseudobulk_preprocessing(reference_label_fine,
preprocessing_output_S,
!!sample_column)
sample_column_name)

)))

Expand Down
97 changes: 96 additions & 1 deletion R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ annotation_label_transfer <- function(input_read_RNA_assay,


# Subset
RNA_assay = input_read_RNA_assay[[assay]][rownames(input_read_RNA_assay[[assay]]) %in% rownames(reference_azimuth[["SCT"]]),]
RNA_assay = input_read_RNA_assay[rownames(input_read_RNA_assay[[assay]]) %in% rownames(reference_azimuth[["SCT"]]),][[assay]]
#RNA_assay <- input_read_RNA_assay@assays$RNA[["counts"]][rownames(input_read_RNA_assay@assays$RNA[["counts"]])%in% rownames(reference_azimuth[["SCT"]]),]
#ADT_assay = input_read_RNA_assay[["ADT"]][rownames(input_read_RNA_assay[["ADT"]]) %in% rownames(reference_azimuth[["ADT"]]),]
input_read_RNA_assay <- CreateSeuratObject( counts = RNA_assay)
Expand Down Expand Up @@ -1272,4 +1272,99 @@ map_split_sce_by_gene = function(sce_df, .col, how_many_chunks_base = 10, max_ce
mutate(sce_md5 = map_chr(!!.col, digest))
}

#' Harmonize cell type annotations based on consensus
#'
#' This function harmonizes cell type annotations by matching them with a reference annotation
#' and applying specific rules for non-immune cell types.
#'
#' @param single_cell_data A data frame containing single-cell data with cell type annotations.
#' @param .sample_column The column name specifying sample information.
#' @param .cell_type The column name for the cell type annotations.
#' @param .azimuth The column name for Azimuth annotations.
#' @param .blueprint The column name for Blueprint annotations.
#' @param .monaco The column name for Monaco annotations.
#'
#' @return A data frame with harmonized cell type annotations.
#'
#' @export
annotation_consensus = function(single_cell_data, .sample_column, .cell_type, .azimuth, .blueprint, .monaco){

.sample_column = enquo(.sample_column)
.azimuth = enquo(.azimuth)
.blueprint = enquo(.blueprint)
.monaco = enquo(.monaco)
.cell_type = enquo(.cell_type)

# reference_annotation =
# CuratedAtlasQueryR::get_metadata() |>
# filter(cell_type_harmonised!="immune_unclassified" | is.na(cell_type_harmonised)) |>
# select(cell_type,
# cell_type_harmonised,
# cell_annotation_azimuth_l2,
# cell_annotation_blueprint_singler,
# cell_annotation_monaco_singler,
# confidence_class
# ) |>
# as_tibble() |>
# mutate(cell_type_clean = cell_type |> clean_cell_types()) |>
# clean_cell_types_deeper() |>
# select(-cell_type) |>
#
# count(cell_type_harmonised, cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler, confidence_class, cell_type_clean) |>
# with_groups(c(cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler, cell_type_clean), ~ .x |> arrange(desc(n)) |> slice(1) )
#
# reference_annotation |> saveRDS("reference_annotation_16_jan_2024.rds")

reference_annotation = readRDS("reference_annotation_16_jan_2024.rds")

annotation=
single_cell_data |>
rename(
.sample := !!.sample_column,
cell_type := !!.cell_type,
cell_annotation_azimuth_l2 := !!.azimuth,
cell_annotation_blueprint_singler := !!.blueprint,
cell_annotation_monaco_singler := !!.monaco
) |>
select(.cell, .sample, cell_type, cell_annotation_azimuth_l2,cell_annotation_blueprint_singler, cell_annotation_monaco_singler) |>
mutate(across(c(cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler), tolower )) |>
mutate(across(c(cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler), clean_cell_types )) |>

is_strong_evidence(cell_annotation_azimuth_l2, cell_annotation_blueprint_singler) |>

# Clen cell types
mutate(cell_type_clean = cell_type |> clean_cell_types()) |>
left_join(read_csv("~/PostDoc/CuratedAtlasQueryR/dev/metadata_cell_type.csv"), by = "cell_type") |>
clean_cell_types_deeper() |>

# Reference annotation link
left_join(reference_annotation )

annotation_connie_non_immune =
annotation |>
filter(cell_type_harmonised |> is.na()) |>

harmonise_names_non_immune() |>

# Fix some gaps in the original code
mutate(cell_type_harmonised = case_when(
cell_type |> tolower() |> str_detect("endothelial") ~ "endothelial_cell",
cell_type |> tolower() |> str_detect("enodothelial") ~ "endothelial_cell",
cell_type |> tolower() |> str_detect("epithelial") ~ "epithelial_cell",
cell_type |> tolower() |> str_detect("fibroblast") ~ "fibroblast",
TRUE ~ cell_type
)) |>

mutate(confidence_class = 1)

single_cell_data |>
left_join(
annotation |>
filter(!cell_type_harmonised |> is.na()) |>
bind_rows(annotation_connie_non_immune) |>
select(.cell, .sample, cell_type_harmonised, confidence_class),
by = join_by(.cell, !!.sample_column == .sample)
)

}

9 changes: 6 additions & 3 deletions R/targets_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ map2_test_differential_abundance_hpc = function(
computing_resources = crew_controller_local(workers = 1) ,
cpus_per_task = 1,
debug_job_id = NULL,
append = FALSE
append = FALSE,
...
){

.abundance = enquo(.abundance)
Expand Down Expand Up @@ -259,7 +260,8 @@ test_differential_abundance_hpc = function(
store = tempfile(tmpdir = "."),
computing_resources = crew_controller_local(workers = 1) ,
debug_job_id = NULL,
append = FALSE
append = FALSE,
...
){

# Create input dataframe
Expand All @@ -275,7 +277,8 @@ test_differential_abundance_hpc = function(
store = store,
computing_resources = computing_resources,
debug_job_id = debug_job_id,
append = append
append = append,
...
)) |>
pull(data) |>
extract2(1)
Expand Down
Loading

0 comments on commit 6aac48e

Please sign in to comment.