diff --git a/NAMESPACE b/NAMESPACE index 324cfe6..1aafe74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -125,7 +125,9 @@ importFrom(Seurat,ScaleData) importFrom(Seurat,VariableFeatures) importFrom(Seurat,as.Seurat) importFrom(Seurat,as.SingleCellExperiment) +importFrom(SingleCellExperiment,"altExp<-") importFrom(SingleCellExperiment,SingleCellExperiment) +importFrom(SingleCellExperiment,altExp) importFrom(SingleR,SingleR) importFrom(SummarizedExperiment,"assay<-") importFrom(SummarizedExperiment,"colData<-") diff --git a/R/functions.R b/R/functions.R index 9354a4e..49ef35d 100644 --- a/R/functions.R +++ b/R/functions.R @@ -696,7 +696,8 @@ non_batch_variation_removal <- function(input_read_RNA_assay, alive_identification_tbl, cell_cycle_score_tbl, assay = NULL, - factors_to_regress = NULL){ + factors_to_regress = NULL, + external_path){ #Fix GChecks empty_droplet = NULL .cell <- NULL @@ -705,7 +706,7 @@ non_batch_variation_removal <- function(input_read_RNA_assay, G2M.Score = NULL # Your code for non_batch_variation_removal function here - + class_input = input_read_RNA_assay |> class() # Get assay if(is.null(assay)) assay = input_read_RNA_assay@assays |> names() |> extract2(1) @@ -722,7 +723,7 @@ non_batch_variation_removal <- function(input_read_RNA_assay, new.assay.name = assay) } - counts = + input_read_RNA_assay = input_read_RNA_assay |> left_join(empty_droplets_tbl, by = ".cell") |> filter(!empty_droplet) |> @@ -734,7 +735,7 @@ non_batch_variation_removal <- function(input_read_RNA_assay, ) if(!is.null(cell_cycle_score_tbl)) - counts = counts |> + input_read_RNA_assay = input_read_RNA_assay |> left_join( cell_cycle_score_tbl |> @@ -750,40 +751,57 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # VariableFeatures(counts) = variable_features # Normalise RNA - normalized_rna <- Seurat::SCTransform( - counts, + normalized_rna <- + Seurat::SCTransform( + input_read_RNA_assay, assay=assay, return.only.var.genes=FALSE, residual.features = NULL, vars.to.regress = factors_to_regress, vst.flavor = "v2", - scale_factor=2186 - ) - - my_assays = "SCT" + scale_factor=2186, + conserve.memory=T, + min_cells=0, + ) |> + GetAssayData(assay="SCT") - if (inherits(input_read_RNA_assay, "SingleCellExperiment")) { - normalized_rna <- normalized_rna |> as.SingleCellExperiment(assay = assay) - } else if (inherits(input_read_RNA_assay, "Seurat")) { - normalized_rna - } - - # Normalise antibodies - if ( "ADT" %in% names(normalized_rna@assays)) { - normalized_data <- normalized_rna %>% - NormalizeData(normalization.method = 'CLR', margin = 2, assay="ADT") %>% - select(-subsets_Ribo_percent, -subsets_Mito_percent, -G2M.Score) + if (class_input == "SingleCellExperiment") { + dir.create(external_path, showWarnings = FALSE, recursive = TRUE) + + + # Write the slice to the output HDF5 file + normalized_rna |> + HDF5Array::writeHDF5Array( + filepath = glue("{external_path}/{digest(normalized_rna)}"), + name = "SCT", + as.sparse = TRUE + ) - my_assays = my_assays |> c("CLR") + } else if (class_input == "Seurat") { + + normalized_rna - } else { - normalized_data <- normalized_rna %>% - # Drop alive columns - select(-subsets_Ribo_percent, -subsets_Mito_percent, -G2M.Score) } + + + # # Normalise antibodies + # if ( "ADT" %in% names(normalized_rna@assays)) { + # normalized_data <- normalized_rna %>% + # NormalizeData(normalization.method = 'CLR', margin = 2, assay="ADT") %>% + # select(-subsets_Ribo_percent, -subsets_Mito_percent, -G2M.Score) + # + # my_assays = my_assays |> c("CLR") + # + # } else { + # normalized_data <- normalized_rna %>% + # # Drop alive columns + # select(-subsets_Ribo_percent, -subsets_Mito_percent, -G2M.Score) + # } - normalized_data[[my_assays]] + + + } @@ -807,10 +825,12 @@ non_batch_variation_removal <- function(input_read_RNA_assay, #' @importFrom dplyr filter #' @importFrom dplyr select #' @import SeuratObject -#' @importFrom SummarizedExperiment left_join +#' @importFrom dplyr left_join #' @import tidySingleCellExperiment #' @import tidyseurat #' @importFrom magrittr not +#' @importFrom SingleCellExperiment altExp +#' @importFrom SingleCellExperiment altExp<- #' @export preprocessing_output <- function(input_read_RNA_assay, empty_droplets_tbl, @@ -840,11 +860,13 @@ preprocessing_output <- function(input_read_RNA_assay, if(input_read_RNA_assay |> is("Seurat")) input_read_RNA_assay[["SCT"]] = non_batch_variation_removal_S else if(input_read_RNA_assay |> is("SingleCellExperiment")){ - message("HPCell says: in order to attach SCT assay to the SingleCellExperiment, the non overlapping features (lowly abundant in the majority of cells) have been dropped") + message("HPCell says: in order to attach SCT assay to the SingleCellExperiment, SCT was added to external experiments slot") + + #input_read_RNA_assay = input_read_RNA_assay[rownames(non_batch_variation_removal_S), ] - input_read_RNA_assay = input_read_RNA_assay[rownames(non_batch_variation_removal_S), ] + # altExp(input_read_RNA_assay) = SingleCellExperiment(assay = list(SCT = non_batch_variation_removal_S)) - assay(input_read_RNA_assay, "SCT") <- GetAssayData(non_batch_variation_removal_S) + assay(input_read_RNA_assay, "SCT") <- non_batch_variation_removal_S } } diff --git a/R/modules_grammar_hpc.R b/R/modules_grammar_hpc.R index 8298256..3c7aa15 100644 --- a/R/modules_grammar_hpc.R +++ b/R/modules_grammar_hpc.R @@ -589,7 +589,7 @@ normalise_abundance_seurat_SCT.HPCell = function(input_hpc, factors_to_regress = # Optionally, you can evaluate the arguments if they are expressions args_list <- lapply(args_list, eval, envir = parent.frame()) - args_list$factory = function(tiers, target_input, target_output){ + args_list$factory = function(tiers, target_input, target_output, external_path){ list( tar_target_raw("factors_to_regress", readRDS("factors_to_regress.rds") |> quote(), deployment = "main"), @@ -601,9 +601,10 @@ normalise_abundance_seurat_SCT.HPCell = function(input_hpc, factors_to_regress = empty_droplets_tbl, alive_identification_tbl, cell_cycle_score_tbl, - factors_to_regress = factors_to_regress + factors_to_regress = factors_to_regress, + external_path = e ) |> - substitute(env = list(i=as.symbol(target_input))), + substitute(env = list(i=as.symbol(target_input), e = external_path)), tiers, other_arguments_to_tier = c(target_input, "empty_droplets_tbl", "alive_identification_tbl", "cell_cycle_score_tbl"), other_arguments_to_map = c(target_input, "empty_droplets_tbl", "alive_identification_tbl", "cell_cycle_score_tbl") @@ -633,6 +634,7 @@ normalise_abundance_seurat_SCT.HPCell = function(input_hpc, factors_to_regress = tiers = input_hpc$initialisation$tier |> get_positions() , target_input = target_input, target_output = target_output, + external_path = glue("{input_hpc$initialisation$store}/external"), script = glue("{input_hpc$initialisation$store}.R") ) } @@ -1031,7 +1033,7 @@ evaluate_hpc.HPCell = function(input_hpc) { if(input_hpc$last_call |> is.null() |> not()) return( tar_meta(store = glue("{input_hpc$initialisation$store}")) |> - filter(name |> str_detect("preprocessing_output_S_?.*$")) |> + filter(name |> str_detect("preprocessing_output_S_?.*$"), type=="pattern") |> pull(name) |> map(tar_read_raw) |> unlist() |> @@ -1041,7 +1043,7 @@ evaluate_hpc.HPCell = function(input_hpc) { else return( tar_meta(store = glue("{input_hpc$initialisation$store}")) |> - filter(name |> str_detect("preprocessing_output_S_?.*$")) + filter(name |> str_detect("preprocessing_output_S_?.*$"), type=="pattern") ) } diff --git a/man/non_batch_variation_removal.Rd b/man/non_batch_variation_removal.Rd index 4e2383b..c747c3b 100644 --- a/man/non_batch_variation_removal.Rd +++ b/man/non_batch_variation_removal.Rd @@ -10,7 +10,8 @@ non_batch_variation_removal( alive_identification_tbl, cell_cycle_score_tbl, assay = NULL, - factors_to_regress = NULL + factors_to_regress = NULL, + external_path ) } \arguments{ diff --git a/tests/testthat/test_single_functions.R b/tests/testthat/test_single_functions.R index f7d8d68..4b2c309 100644 --- a/tests/testthat/test_single_functions.R +++ b/tests/testthat/test_single_functions.R @@ -489,7 +489,7 @@ library(crew.cluster) data_container_type = "sce_hdf5", # tier = c("tier_1", "tier_2"), # - # debug_step = "pseudobulk_table_dispersion_gene", + # debug_step = "non_batch_variation_removal_S_1", # Default resourced