forked from dfsp-spirit/freesurfer_parallel_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fs_longitudinal_pipeline.bash
executable file
·265 lines (221 loc) · 14.6 KB
/
fs_longitudinal_pipeline.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/bin/bash
#
# This script runs the longitudinal FreeSurfer pipeline for all subjects in
# the current directory.
#
# It expects the following input:
# - For each subject, you need to have MRI data that has already been pre-processed
# in FreeSurfer with the standard (cross-sectional) recon-all command for both
# timepoints. The 2 recon-all output directories must be named <subjectid>_MR1 and
# <subjectid>_MR2. The '_MR1' and '_MR2' part at the end is mandatory!
#
# See the comments below after the steps for the produced output of this script.
#
#
# Written by Tim Schaefer, 2021-10-06
#
#
############## USAGE INFORMATION
# 1) Make sure GNU parallel is installed and adapt the number of cores below.
# 2) Run in the directory with the input prepared as mentioned above without any arguments to see help.
#
# This takes several hours for each input directory, depending on which parts you run, see comments below.
##################### Settings -- adapt to your system and needs ################
do_run_part1="NO"
do_run_part2="NO"
do_run_part3="YES" # Do not forget to set the 'measures' to compute below.
do_run_part3_in_parallel="YES" # Whether to use highly parallel version for part 3. This is much faster on multi-core systems and recommended.
nodes=44 # The number of CPU cores to use. Adapt to your machine.
## The following settings are used in part3 only, you can ignore them if you do not run that part.
python2_command="python2" # The command that calles a python2 interpreter (not python3, which is often what the command 'python' points to on modern systems).
measures="thickness" # <=== Adapt this. Can be a list, e.g., measures="thickness area volume".
############################## End of settings #################################
export SUBJECTS_DIR=`pwd`
apptag="[FS_LONG]"
echo "$apptag Running in directory '$SUBJECTS_DIR' with '$nodes' CPU cores."
## You may want to run the parts below one after the other and check
## intermediate results manually inbetween.
##### Determine for which subjects to run.
subjects_file="$1"
if [ -z "$subjects_file" ]; then
echo "USAGE: $0 <subjects_file> | infer [<qdec_table>]"
echo " <subjects_file> : path to a text file containing 1 subject per line. (The file cannot be named 'infer', see below.) Used in Part I and II."
echo " infer : alternative to specifying the subjects file, this keyword tells the script to auto-detect subjects in directory from _MR1 prefixes."
echo " <qdec_table> : optional, the QDEC table file to use in Part III. Defaults to './qdec/long.qdec.table.dat'. Used in Part III only."
echo "NOTE: You may want to adapt some settings in the script header, e.g., which parts to run and what measures to compute slopes for in part III."
exit 1
fi
qdec_file="$2"
if [ -z "$qdec_file" ]; then
qdec_file="./qdec/long.qdec.table.dat"
fi
if [ "$do_run_part1" = "YES" -o "$do_run_part2" = "YES" ]; then
if [ "$subjects_file" = "infer" ]; then
subjects_by_line=$(ls -d *_MR1 | cut -f 1 -d '_')
else
if [ ! -f "$subjects_file" ]; then
echo "$apptag ERROR: Subjects file '$subjects_file' does not exist or cannot be read."
exit 1
fi
subjects_by_line=$(cat "$subjects_file")
# check for subject directories
subjects_list=$(echo $subjects_by_line | tr '\n' ' ')
echo "subjects list: $subjects_list"
for subject in $subjects_list; do
sjd="${SUBJECTS_DIR}/${subject}"
if [ ! -d "$sjd" ]; then
echo "$apptag ERROR: The directory for subject $subject does not exist at expected path '$sjd'. Please check your subjects file."
exit 0
fi
done
fi
num_subjects=$(echo "${subjects_by_line}" | wc -l | tr -d '[:space:]')
fi
################################################################################
############################### Part I #########################################
################################################################################
## Create within-subject template.
## This produces the directory '<subjectid>' (the base template) from '<subjectid>_MR1' and '<subjectid>_MR2'.
## It takes a bit longer than a typical full recon-all cross-sectional run. You should expect this to run very roughly 16 hours per subject (on a single core), depending on your hardware.
if [ "${do_run_part1}" = "YES" ]; then
echo "$apptag Running part I: Creation of within-subject template for $num_subjects subjects. This is gonna take a while..."
echo "subjects: $subjects_by_line"
echo "$subjects_by_line" | parallel -S ${nodes}/: --workdir . --joblog template.log "recon-all -base {} -tp {}_MR1 -tp {}_MR2 -all -no-isrunning"
else
echo "$apptag SKIPPING part I as requested"
fi
################################################################################
############################## Part II #########################################
################################################################################
## Run longitudinal pipeline for MR1 and MR2
## This produces the directory '<subjectid>_long.MR1.<subject_id>'.
## This takes about 5 hours per subject per run (5h for _MR1, and another 5h for _MR2)
if [ "${do_run_part2}" = "YES" ]; then
echo "$apptag Running part II: Longitudinal pipeline for $num_subjects subjects."
echo "$apptag - Running step IIa: Handling timepoint MR1"
echo "$subjects_by_line" | parallel -S ${nodes}/: --workdir . --joblog MR1_longitudinal.log "recon-all -long {}_MR1 {} -all -no-isrunning"
## This produces the directory '<subjectid>_long.MR2.<subject_id>'.
echo "$apptag - Running step IIb: Handling timepoint MR2"
echo "$subjects_by_line" | parallel -S ${nodes}/: --workdir . --joblog MR2_longitudinal.log "recon-all -long {}_MR2 {} -all -no-isrunning"
else
echo "$apptag SKIPPING part II as requested"
fi
################################################################################
############################## Part III ########################################
################################################################################
## Produces the slopes for your measureof interest(e.g., cortical thickness) between the timepoints.
## This requires the file 'long.qdec.table.dat', which you will need to create based on the subject demographics (inter-scan period, etc). There is an
## R function in the 'fsbrain' R package that can make it a lot easier (and less error-prone) to create that file. The function is: fsbrain::demographics.to.qdec.table.dat()
##
## See https://surfer.nmr.mgh.harvard.edu/fswiki/LongitudinalTwoStageModel for information on the table. That table
## a space-separated, CSV-like text file with columns 'fsid', 'fsid-base' and 'year'. E.g., the first 5 lines could look like (without the bash comment signs '##' at the start):
##
## fsid fsid-base year
## OAS2_0001_MR1 OAS2_0001 0
## OAS2_0001_MR2 OAS2_0001 1.25
## OAS2_0002_MR1 OAS2_0001 0
## OAS2_0002_MR2 OAS2_0001 1.75
##
## More columns are allowed, but these columns (in that order!) must appear first with EXACTLY the column names given above.
##
## This takes about 1 minute per hemisphere per measure per subject (so 2 minutes per subject if you only want
## the single measure 'thickness' in the loop below). It cannot be parallelized over the subjects though, because the
## long_mris_slopes tool handles the whole qdec table and is sequential. We do at least parallelize over the 2 hemis below.
##
## IMPORTANT: A few words of warning regarding long_mris_slopes:
## 1) It cannot be run in parallel easily as it works on the whole list, so we only parallelize over hemis (see above).
## 2) It requires python2 and 'python' points to python3 on modern systems, so we manually call it with python2 and the full
## path to the script below. You may need to adapt the paths to your system.
## 3) Most importantly: it will STOP if it encounters an issue with a single subject, so all subjects after that one in the list
## will not be handled at all (for that hemi, as we run for the 2 hemis independently via GNU parallel).
if [ "${do_run_part3}" = "YES" ]; then
#long_mris_slopes_command="long_mris_slopes"
if [ -z "$FREESURFER_HOME" ]; then
echo "$apptag Env var FREESURFER_HOME not set, please setup FreeSurfer properly."
exit 0
fi
long_mris_slopes_bin="${FREESURFER_HOME}/bin/long_mris_slopes" # The full path to the 'long_mris_slopes' program that comes with FreeSurfer. If FreeSurfer is setup correctly for the bash shell on your system, you do not need to change anything.
long_mris_slopes_command="${python2_command} ${long_mris_slopes_bin}" # Typically evaluates to something like: python2 ${FREESURFER_HOME}/bin/long_mris_slopes
echo "$apptag Running part III: Computing slopes of measure data ($measures)."
qdec_file_time_column="years" # This is the name of the inter-scan time column in the QDEC file. The unit is up to you, it depends on what you want to measure.
## ... Yearly change seems reasonable, so we assume inter-scan time is given in years by default, and listed in a column named 'year', as in the example above.
if [ -f "${qdec_file}" ]; then
qdec_subjects=$(awk '(NR>1)' ${qdec_file} | awk '{print $2}' | uniq | tr '\n' ' ')
num_qdec_subjects=$(echo "${qdec_subjects}" | wc -w | tr -d '[:space:]')
subjects_missing_input=""
echo "$apptag Handling QDEC file containing $num_qdec_subjects subjects."
# Check whether the required input files exist to keep long_mris_slopes from crashing once it hits a subject which does not have them.
has_errors="no"
for subject in $qdec_subjects; do
subject_has_errors="no"
sjd_MR1_surf="${SUBJECTS_DIR}/${subject}_MR1.long.${subject}/surf"
sjd_MR2_surf="${SUBJECTS_DIR}/${subject}_MR2.long.${subject}/surf"
for measure in $measures; do
for hemi in lh rh; do
input_file_MR1="${sjd_MR1_surf}/${hemi}.${measure}"
if [ ! -f "$input_file_MR1" ]; then
echo "$APPTAG ERROR: Subject $subject does not even have the expected $hemi hemi measure $measure MR1 input file '$input_file_MR1' for computing slopes."
has_errors="yes"
subject_has_errors="yes"
fi
input_file_MR2="${sjd_MR2_surf}/${hemi}.${measure}"
if [ ! -f "$input_file_MR2" ]; then
echo "$APPTAG ERROR: Subject $subject does not even have the expected $hemi hemi measure $measure MR2 input file '$input_file_MR2' for computing slopes."
has_errors="yes"
subject_has_errors="yes"
fi
done
done
if [ "$subject_has_errors" = "yes" ]; then
subjects_missing_input="$subjects_missing_input $subject"
fi
done
if [ "$has_errors" = "yes" ]; then
num_subjects_missing_input=$(echo "${subjects_missing_input}" | wc -w | tr -d '[:space:]')
echo "$apptag ERROR: Detected missing input files, see above. Running long_mris_slopes would fail. Not starting the run."
echo "$apptag There are $num_subjects_missing_input of $num_qdec_subjects subjects missing required input files: $subjects_missing_input"
exit 1
fi
if [ "${do_run_part3_in_parallel}" = "YES" ]; then
# Split the QDEC table into 1 table per subject to allow running slopes in parallel over subjects.
exec_path_of_this_script=$(dirname $0)
qdec_split_table_script="${exec_path_of_this_script}/qdec_split_table.bash"
if [ ! -x "${qdec_split_table_script}" ]; then
echo "ERROR: QDEC split table script not executable or cannot be read at '${qdec_split_table_script}'. Exiting."
exit 1
else # Run the table splitting. This produces one QDEC table per subject in the qdec_output_dir (see below).
$qdec_split_table_script ${qdec_file}
fi
qdec_output_dir=$(dirname "$qdec_file") # This is where the qdec_split_table_script will put the split QDEC table files (one per subject).
# Check whether all expected tables exist. They are required for the GNU parallel command below.
for subject in $qdec_subjects; do
subject_qdec_table_file="${qdec_output_dir}/qdec_subject_table_${subject}.dat"
if [ ! -f "$subject_qdec_table_file" ]; then
echo "ERROR: The QDEC table for subject ${subject} was not created at '${subject_qdec_table_file}', cannot run long_mris_slopes. Exiting."
exit 1
fi
done
for measure in $measures; do
for hemi in lh rh; do
echo "$apptag - Part III: Computing slopes for $num_qdec_subjects subjects and hemi $hemi in parallel over subjects ($nodes cores) for measure '$measure' (of all measures '$measures')."
echo "$apptag * Using QDEC tables based on '$qdec_file' with time column '$qdec_file_time_column' (split into 1 table per subject)."
echo "$qdec_subjects" | tr ' ' '\n' | parallel -S ${nodes}/: --workdir . --joblog long_mris_slopes_${measure}_${hemi}.log "${long_mris_slopes_command} --qdec ${qdec_output_dir}/qdec_subject_table_{}.dat --meas $measure --hemi $hemi --do-avg --do-rate --do-pc1 --do-spc --do-stack --do-label --time ${qdec_file_time_column} --qcache fsaverage --sd $SUBJECTS_DIR"
done
done
else # NOT in parallel over subjects.
# We compute the 2 hemispheres in parallel for each measure, so we do not need to loop over hemis here.
for measure in $measures; do
echo "$apptag - Part III: Computing slopes for $num_qdec_subjects subjects and both hemis in parallel over hemis (using 2 cores) for measure '$measure' (of all measures '$measures')."
echo "$apptag * Using QDEC table '$qdec_file' with time column '$qdec_file_time_column'."
echo "lh rh" | tr ' ' '\n' | parallel -S ${nodes}/: --workdir . --joblog long_mris_slopes_$measure.log "${long_mris_slopes_command} --qdec ${qdec_file} --meas $measure --hemi {} --do-avg --do-rate --do-pc1 --do-spc --do-stack --do-label --time ${qdec_file_time_column} --qcache fsaverage --sd $SUBJECTS_DIR"
done
fi
else
echo "$apptag ERROR: The QDEC table file '${qdec_file}' does not exist or cannot be read: cannot produce measure data. Please create it and manually re-run step III of the script only."
echo "$apptag Note: one can use the fsbrain R package to create the file from a demographics data.frame, see functions qdec.table.skeleton() and demographics.to.qdec.table.dat()."
exit 0
fi
else
echo "$apptag SKIPPING part III as requested"
fi
exit 1