generated from nipreps/sops-cookiecutter
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
enh: robustify physio to BIDS conversion
- Loading branch information
Showing
5 changed files
with
804 additions
and
1,793 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,298 @@ | ||
# Copyright 2024 The Axon Lab <[email protected]> | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# We support and encourage derived works from this project, please read | ||
# about our expectations at | ||
# | ||
# https://www.nipreps.org/community/licensing/ | ||
# | ||
|
||
from __future__ import annotations | ||
|
||
import argparse | ||
from pathlib import Path | ||
from json import loads, dumps | ||
from sys import exit | ||
import numpy as np | ||
import pandas as pd | ||
import h5py | ||
|
||
from splitruns import DATA_PATH, BIDS_PATH, parse_session, main as extract | ||
|
||
RECALIBRATED_SESSION = 23 | ||
FIRST_O2_SESSION = ( | ||
11 # The cable to record O2 signal has been received midway through the acquisition | ||
) | ||
MISSING_RB = ("excl029",) | ||
|
||
|
||
def _channel_id(channel_name): | ||
name_lower = channel_name.lower() | ||
if name_lower == "scanner ttl": | ||
return "trigger" | ||
|
||
if name_lower.startswith("digital") or "stp input" in name_lower: | ||
channel_num = int(channel_name.strip()[-2]) | ||
return f"stim{channel_num - 1}" if channel_num > 0 else "trigger" | ||
|
||
if name_lower.startswith("card") or name_lower == "ecg" or "eeg100c" in name_lower: | ||
return "cardiac" | ||
|
||
if name_lower.startswith("resp") or name_lower == "rb" or "tsd160a" in name_lower: | ||
return "respiratory0" | ||
|
||
if name_lower.startswith("co2"): | ||
return "respiratory1" | ||
|
||
if name_lower.startswith("o2"): | ||
return "respiratory2" | ||
|
||
raise ValueError(f"Unknown channel name '{channel_name}'") | ||
|
||
|
||
def _gen_timeseries(ch, offset=0.0): | ||
return ( | ||
np.linspace( | ||
ch["start_time"], | ||
len(ch["data"]) / ch["frequency"] + ch["start_time"], | ||
num=len(ch["data"]), | ||
) | ||
- offset | ||
) | ||
|
||
|
||
def get_1st_trigger_time(channels, start_run): | ||
ch = channels["trigger"] | ||
trigger_data = ch["data"] | ||
|
||
timeseries = _gen_timeseries(ch) | ||
|
||
index_left = np.abs(timeseries - max(ch["start_time"], start_run - 1)).argmin() | ||
index_right = np.abs(timeseries - (start_run + 1)).argmin() + 1 | ||
first_trigger = (trigger_data[index_left:index_right] > 1e-5).argmax(axis=0) | ||
|
||
# Align the timeseries with the first trigger | ||
first_trigger_t = timeseries[index_left:index_right][first_trigger] | ||
timeseries -= first_trigger_t | ||
ch["timeseries"] = timeseries | ||
return timeseries, first_trigger_t | ||
|
||
|
||
def extract_signal(recording, src_file, out_path, channels, first_trigger_t, session): | ||
# Generate BIDS name | ||
recording_filepath = out_path / src_file.name.replace( | ||
"_physio.hdf5", f"_recording-{recording}_physio.tsv.gz" | ||
) | ||
|
||
# Generate the time axis | ||
channel_names = sorted( | ||
[key for key in channels.keys() if key.startswith(recording)] | ||
) | ||
|
||
# All recordings MUST have the same frequency and hence, have same time axis | ||
timeseries = _gen_timeseries(channels[channel_names[0]], offset=first_trigger_t) | ||
for name in channel_names: | ||
channels[name]["timeseries"] = timeseries | ||
|
||
# Prepare metadata | ||
sidecar = loads(Path(f"defaults_{recording}.json").read_text()).copy() | ||
sidecar.update( | ||
{ | ||
"SamplingFrequency": channels[channel_names[0]]["frequency"], | ||
"StartTime": timeseries[0], | ||
} | ||
) | ||
|
||
recording_data = {} # Prepare dataframe | ||
for colname, name in zip(sidecar["Columns"], channel_names): | ||
sidecar[colname]["Units"] = channels[name]["units"] | ||
recording_data[colname] = channels[name]["data"] | ||
|
||
# Before session 23, calibration was a bit off | ||
if session.startswith("pilot") or int(session) < RECALIBRATED_SESSION: | ||
recording_data["CO2"]["data"] = ( | ||
recording_data["CO2"]["data"] * (8.0 - 0.045) / 0.8 + 0.045 | ||
) | ||
if "O2" in recording_data: | ||
recording_data["O2"]["data"] = ( | ||
recording_data["CO2"]["data"] - 0.1 | ||
) * 10.946 / (20.946 + 0.1) + 10 | ||
|
||
sidecar["Columns"] = list(recording_data.keys()) | ||
|
||
# We can store data now | ||
pd.DataFrame(recording_data).to_csv( | ||
recording_filepath, | ||
compression="gzip", | ||
header=False, | ||
sep="\t", | ||
na_rep="n/a", | ||
) | ||
print(f"Recording updated: {recording_filepath}") | ||
|
||
# And metadata | ||
sidecar_path = recording_filepath.parent / recording_filepath.name.replace( | ||
".tsv.gz", ".json" | ||
) | ||
sidecar_path.write_text(dumps(sidecar, indent=2)) | ||
print(f"Sidecar JSON updated: {sidecar_path}") | ||
|
||
return recording_filepath | ||
|
||
|
||
def convert( | ||
src_file: Path, | ||
bids_path: Path = BIDS_PATH, | ||
): | ||
# Preparations | ||
src_fname = src_file.name.split("_", 2)[:2] | ||
participant = src_fname[0][4:] | ||
session = src_fname[1][4:] | ||
|
||
out_path = ( | ||
bids_path | ||
/ f"sub-{participant}" | ||
/ f"ses-{session}" | ||
/ ("func" if "task-" in src_file.name else "dwi") | ||
) | ||
|
||
# Get data from split run (HDF5) | ||
channels = {} | ||
with h5py.File(src_file, "r") as h5f: | ||
start_recording = np.datetime64( | ||
h5f.attrs["start_recording"].decode("utf-8").rsplit("+", 1)[0] | ||
) | ||
start_run = h5f.attrs["start_run"] | ||
stop_run = h5f.attrs["stop_run"] | ||
|
||
for i, key in enumerate(h5f.keys()): | ||
metadata = h5f[key].attrs | ||
channel_id = _channel_id(metadata["name"]) | ||
channels[channel_id] = dict(metadata.items()) | ||
channels[channel_id]["num"] = int(key.split("_")[-1]) | ||
channels[channel_id]["data"] = h5f[key]["data"][()] | ||
|
||
if session.startswith("pilot") or int(session) < FIRST_O2_SESSION: | ||
channels.pop("respiratory2", None) | ||
|
||
# Extract & store first trigger | ||
_, first_trigger_t = get_1st_trigger_time(channels, start_run) | ||
trigger_filepath = out_path / src_file.name.replace("_physio.hdf5", "_stim.tsv.gz") | ||
|
||
columns = ["trigger"] + sorted( | ||
[_ch for _ch in channels.keys() if _ch.startswith("stim")] | ||
) | ||
trigger_data = {name: channels[name]["data"] for name in columns} | ||
|
||
trigger_sidecar = { | ||
"SamplingFrequency": channels["trigger"]["frequency"], | ||
"StartTime": channels["trigger"]["timeseries"][0], | ||
"Columns": columns, | ||
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US", | ||
} | ||
|
||
for col in columns: | ||
trigger_sidecar[col] = { | ||
"Description": f"Pulse signal [{channels[col]['name']}] generated with Psychopy", | ||
"Units": channels[col]["units"], | ||
"Model": "STP100D", | ||
} | ||
|
||
trigger_sidecar["trigger"]["Description"] = ( | ||
f"Scanner trigger signal [{channels['trigger']['name']}]." | ||
) | ||
|
||
if out_path.name == "dwi": | ||
trigger_sidecar["trigger"]["Description"] += ( | ||
" IMPORTANT! The DWI sequence sends triggers during calibration." | ||
" Therefore, a total of 203 trigger pulses SHOULD be discarded at the beginning of the run" | ||
" (corresponding to 2 x 87 slices single slice mode, plus 29 for one multi-slice volume)." | ||
) | ||
|
||
( | ||
trigger_filepath.parent / trigger_filepath.name.replace(".tsv.gz", ".json") | ||
).write_text(dumps(trigger_sidecar, indent=2)) | ||
|
||
pd.DataFrame(trigger_data).to_csv( | ||
trigger_filepath, | ||
compression="gzip", | ||
header=False, | ||
sep="\t", | ||
na_rep="n/a", | ||
) | ||
|
||
out_files = [] | ||
for recording in ("cardiac", "respiratory"): | ||
out_files.append( | ||
extract_signal(recording, src_file, out_path, channels, first_trigger_t) | ||
) | ||
|
||
|
||
def parse_args(): | ||
""" | ||
Parse command line arguments. | ||
Returns: | ||
-------- | ||
argparse.Namespace | ||
Parsed command line arguments. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Process physiological data for a BIDS dataset." | ||
) | ||
parser.add_argument("participant", type=str, help="Participant ID") | ||
parser.add_argument( | ||
"session", | ||
type=parse_session, | ||
help="Session identifier (can be an integer or string).", | ||
) | ||
parser.add_argument( | ||
"--data_path", | ||
type=Path, | ||
default=DATA_PATH, | ||
help="Path to BIOPAC .acq files (default: %(default)s)", | ||
) | ||
parser.add_argument( | ||
"--bids_path", | ||
type=Path, | ||
default=BIDS_PATH, | ||
help="Path to BIDS dataset (default: %(default)s)", | ||
) | ||
parser.add_argument( | ||
"--overwrite", action="store_true", help="Do not overwrite existing files." | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
|
||
if not ( | ||
session_path := args.bids_path | ||
/ f"sub-{args.participant}" | ||
/ f"ses-{args.session}" | ||
).exists(): | ||
print(f"Skipping {session_path} (missing).") | ||
exit(1) | ||
|
||
src_files = sorted( | ||
args.data_path.glob(f"sub-{args.participant}_ses-{args.session}_*_physio.hdf5") | ||
) | ||
if src_files and args.overwrite: | ||
src_files = extract( | ||
args.participant, args.session, args.data_path, args.bids_path | ||
) | ||
|
||
for chunk in src_files: | ||
convert(chunk, args.bids_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"Columns": [ | ||
"ecg" | ||
], | ||
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US", | ||
"ecg": { | ||
"Description": "continuous measurements of Lead I electrocardiogram", | ||
"Model": "ECG100C MRI & MECMRI-2 amplifier" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"Columns": [ | ||
"belt", | ||
"CO2", | ||
"O2" | ||
], | ||
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US", | ||
"belt": { | ||
"Description": "continuous breathing measurement by negative differential pressure with a respiration belt", | ||
"Model": "DA100C TSD160A transducer" | ||
}, | ||
"CO2": { | ||
"Description": "continuous breathing measurement: absolute CO2 concentration", | ||
"Units": "%", | ||
"Model": "ML206", | ||
"Manufacturer": "AD Instruments Pty. Ltd., Sydney, Australia" | ||
}, | ||
"O2": { | ||
"Description": "continuous breathing measurement: absolute O2 concentration", | ||
"Units": "%", | ||
"Model": "ML206", | ||
"Manufacturer": "AD Instruments Pty. Ltd., Sydney, Australia" | ||
} | ||
} |
Oops, something went wrong.