-
Notifications
You must be signed in to change notification settings - Fork 58
/
tp53_analysis.sh
executable file
·92 lines (79 loc) · 2.72 KB
/
tp53_analysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
# Pipeline to reproduce pancancer pathways TP53 machine learning classifier
#
# Usage: bash tp53_analysis.sh
#
# Output: Will train a pan cancer model to detect TP53 aberration. Will also
# train a unique classifier within each specific cancer type
# Set Constants
tp53_diseases='BLCA,BRCA,CESC,COAD,ESCA,GBM,HNSC,KICH,LGG,LIHC,LUAD,LUSC,'\
'PAAD,PRAD,READ,SARC,SKCM,STAD,UCEC'
alphas='0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7'
l1_mixing='0.1,0.125,0.15,0.2,0.25,0.3,0.35'
tp53_dir='classifiers/TP53'
###############
# Step 1. Pan Cancer TP53 classification
###############
python scripts/pancancer_classifier.py \
--genes 'TP53' \
--diseases $tp53_diseases \
--drop \
--copy_number \
--remove_hyper \
--alt_folder $tp53_dir \
--alphas $alphas \
--l1_ratios $l1_mixing \
--keep_intermediate \
--shuffled
###############
# Step 2. Within Disease type TP53 classification
###############
python scripts/within_tissue_analysis.py \
--genes 'TP53' \
--diseases $tp53_diseases \
--remove_hyper \
--alt_folder $tp53_dir'/within_disease' \
--alphas $alphas \
--l1_ratios $l1_mixing
###############
# Step 3. Get scores for all samples and visualize distribution of scores
###############
python scripts/apply_weights.py \
--classifier $tp53_dir \
--copy_number
python scripts/visualize_decisions.py \
--scores $tp53_dir \
--custom 'TP53_loss'
python scripts/map_mutation_class.py \
--scores $tp53_dir \
--genes 'TP53'
python scripts/copy_burden_merge.py \
--classifier_folder $tp53_dir
###############
# Step 4. Plot additional TP53 results
###############
# Summary Figures
Rscript --vanilla scripts/viz/ddr_summary_figures.R
Rscript --vanilla scripts/compare_within_models.R \
--within_dir $tp53_dir'/within_disease' \
--pancan_summary $tp53_dir
# Mutation classification stratified by cancer-Type
jupyter nbconvert --to=html \
--FilesWriter.build_directory=scripts \
--ExecutePreprocessor.kernel_name=python3 \
--ExecutePreprocessor.timeout=100000 \
--execute scripts/tp53_phenocopy.ipynb
# Mutation classification stratified by phenocopying variant
jupyter nbconvert --to=html \
--FilesWriter.build_directory=scripts \
--ExecutePreprocessor.kernel_name=python3 \
--ExecutePreprocessor.timeout=100000 \
--execute scripts/tp53_ddr_cancertype_subtypes.ipynb
###############
# Step 5. SNAPTRON exon-exon junction analysis
###############
cd scripts/snaptron
bash dna_damage_repair_tp53exon.sh
cd ../..
# Copy burden analysis requires snaptron results
Rscript --vanilla scripts/copy_burden_figures.R