-
Notifications
You must be signed in to change notification settings - Fork 2
/
MySeuratPipeline_v2.R
118 lines (94 loc) · 4.19 KB
/
MySeuratPipeline_v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
require(methods)
script_dir = "/cluster/home/tandrews/scripts/LiverMap2.0"
auto_anno_dir = "/cluster/projects/macparland/TA/AutoAnnotation"
source(paste(script_dir, "Setup_autoannotation.R", sep="/"))
i <- as.numeric(as.character(commandArgs(trailingOnly=TRUE)))
#i = 1
Params <- read.table(paste(script_dir, "LiverMap_SampleProcessingParams.csv", sep="/"), sep=",", header=T)
mt_filter <- Params[i,"MTfilter"]
ng_filter <- Params[i,"nGenefilter"]
nc_filter <- Params[i,"nCellfilter"]
nkNN <- Params[i, "kNN"]
npcs <- Params[i, "nPCs"]
nhvg <- Params[i, "nHGV"]
this_seed <- Params[i, "Seed"]
max_cells <- 20000
name <- as.character(Params[i, "Name"])
folder <- as.character(Params[i, "Directory_UHN"])
set.seed(this_seed)
#res <- 5
res <- 3
require(dplyr)
require(Seurat)
require(Matrix)
# Read the data
mydata <- Read10X(data.dir = paste(folder, "filtered_gene_bc_matrices/GRCh38", sep="/"))
print(paste("Stats out:", dim(mydata), "input dimensions of", name))
# Enforce a maximum number of cells captured based on expected number.
mydata <- mydata[,Matrix::colSums(mydata) > ng_filter]
if (ncol(mydata) > 20000) {
ng_detect <- Matrix::colSums(mydata);
q_thresh <- quantile(ng_detect, 1-max_cells/length(ng_detect))
mydata <- mydata[,ng_detect > q_thresh]
}
myseur <- CreateSeuratObject(counts = mydata, project = name, min.cells = nc_filter, min.features = ng_filter) ### <- ERROR! gene & cell filter are backwards. - Fixed on 20/04/2020
# Mitochondrial filter
myseur[["percent.mt"]] <- PercentageFeatureSet(myseur, pattern = "^MT-")
myseur <- subset(myseur, subset = nFeature_RNA > ng_filter & percent.mt < mt_filter)
print(paste("Stats out:", dim(myseur), "seurat dimensions of", name))
# Add metadata
[email protected]$cell_barcode <- colnames(myseur)
[email protected]$donor <- rep(name, ncol(myseur));
[email protected]$cell_ID <- paste([email protected]$donor, [email protected]$cell_barcode, sep="_");
# sctransform normalization
#require("sctransform")
#norm <- sctransform::vst(Matrix(myseur@assays$RNA@counts), res_clip_range=c(-Inf, Inf), method="nb_fast");
#myseur@assays$RNA@data <- norm$y;
myseur <- NormalizeData(myseur);
# Scale
myseur <- ScaleData(myseur);
# HVG genes (n=2000)
myseur <- FindVariableFeatures(myseur, selection.method = "vst", nfeatures = 2000)
# PCA
myseur <- RunPCA(myseur, features = VariableFeatures(object = myseur))
ElbowPlot(myseur)
# Clustering
myseur <- FindNeighbors(myseur, dims = 1:npcs)
myseur <- FindClusters(myseur, resolution = res, k.param=nkNN)
# Visualization with TSNE & UMAP
myseur <- RunTSNE(myseur, dims = 1:npcs)
myseur <- RunUMAP(myseur, dims = 1:npcs, parallel=FALSE)
png(paste(name, "_default_tsne.png", sep=""), width=6, height=6, units="in", res=100)
DimPlot(myseur, reduction = "tsne")
dev.off()
png(paste(name, "_default_umap.png", sep=""), width=6, height=6, units="in", res=100)
DimPlot(myseur, reduction = "umap")
dev.off()
print(paste("Stats out:", length(unique([email protected]$seurat_clusters)), "seurat clusters in", name))
#Cell-cycle
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
myseur <- CellCycleScoring(myseur, s.features = s.genes, g2m.features=g2m.genes, set.ident=TRUE)
# AutoAnnotation with scmap
myseur <- run_scmap_seurat(myseur, scmap_ref=map1_ref);
simplify_annotations <- function(annotations) {
simplified <- as.character(annotations)
simplified[simplified %in% c(
"AntibodysecretingBcells",
"MatureBcells")] <- "Bcells"
simplified[simplified %in% c(
"CD3abTcells", "gdTcells1", "gdTcells2")] <- "Tcells"
simplified[simplified %in% c(
"PericentralHep", "UnidentifiedHep", "PeriportalHep",
"interzonalHep")] <- "Hepatocyte"
return(simplified);
}
general_labs <- simplify_annotations([email protected]$scmap_cell_anno)
general_labs2 <- simplify_annotations([email protected]$scmap_cluster_anno)
general_labs[general_labs != general_labs2] <- "ambiguous"
general_labs[general_labs == "unassigned"] <- "ambiguous"
[email protected]$general_labs <- general_labs;
inconsistent <- [email protected]$scmap_cell_anno != [email protected]$scmap_cluster_anno;
[email protected]$consistent_labs <- [email protected]$scmap_cell_anno;
[email protected]$consistent_labs[inconsistent] <- general_labs[inconsistent]
saveRDS(myseur, paste(name,"Anno_SeurObj.rds", sep="_"));