Skip to content

Commit

Permalink
enh: support chunked recordings (e.g., sessions 11 and 13)
Browse files Browse the repository at this point in the history
  • Loading branch information
oesteban committed Oct 9, 2024
1 parent b5996b8 commit dc1fe3a
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 83 deletions.
15 changes: 8 additions & 7 deletions code/physioconv/acq2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,13 @@ def extract_signal(recording, src_file, out_path, channels, first_trigger_t, ses
recording_data[colname] = channels[name]["data"]

# Before session 23, calibration was a bit off
if session.startswith("pilot") or int(session) < RECALIBRATED_SESSION:
recording_data["CO2"]["data"] = (
recording_data["CO2"]["data"] * (8.0 - 0.045) / 0.8 + 0.045
if recording == "respiratory" and (session.startswith("pilot") or int(session) < RECALIBRATED_SESSION):
recording_data["CO2"] = (
recording_data["CO2"] * (8.0 - 0.045) / 0.8 + 0.045
)
if "O2" in recording_data:
recording_data["O2"]["data"] = (
recording_data["CO2"]["data"] - 0.1
recording_data["O2"] = (
recording_data["CO2"] - 0.1
) * 10.946 / (20.946 + 0.1) + 10

sidecar["Columns"] = list(recording_data.keys())
Expand Down Expand Up @@ -235,7 +235,7 @@ def convert(
out_files = []
for recording in ("cardiac", "respiratory"):
out_files.append(
extract_signal(recording, src_file, out_path, channels, first_trigger_t)
extract_signal(recording, src_file, out_path, channels, first_trigger_t, session)
)


Expand Down Expand Up @@ -289,7 +289,8 @@ def parse_args():
src_files = sorted(
args.data_path.glob(f"sub-{args.participant}_ses-{args.session}_*_physio.hdf5")
)
if src_files and args.overwrite:

if not src_files or args.overwrite:
src_files = extract(
args.participant, args.session, args.data_path, args.bids_path
)
Expand Down
49 changes: 29 additions & 20 deletions code/physioconv/physio-to-bids.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,10 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"id": "a7d38bbd",
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name '_channel_id' from 'convert' (/data/home/oesteban/workspace/hcph-sops/code/physioconv/convert.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36m<cell line: 13>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatetime\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m timezone, timedelta\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcollections\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m defaultdict\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconvert\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _channel_id, _gen_timeseries, get_1st_trigger_time\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mh5py\u001b[39;00m\n\u001b[1;32m 17\u001b[0m plt\u001b[38;5;241m.\u001b[39mrcParams[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfigure.figsize\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m20\u001b[39m, \u001b[38;5;241m2.5\u001b[39m)\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name '_channel_id' from 'convert' (/data/home/oesteban/workspace/hcph-sops/code/physioconv/convert.py)"
]
}
],
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
Expand All @@ -45,7 +33,14 @@
"from datetime import timezone, timedelta\n",
"from collections import defaultdict\n",
"\n",
"from convert import _channel_id, _gen_timeseries, get_1st_trigger_time\n",
"from acq2bids import (\n",
" _channel_id,\n",
" _gen_timeseries,\n",
" get_1st_trigger_time,\n",
" RECALIBRATED_SESSION,\n",
" FIRST_O2_SESSION,\n",
" MISSING_RB,\n",
")\n",
"\n",
"import h5py\n",
"\n",
Expand Down Expand Up @@ -73,9 +68,6 @@
"source": [
"DATA_PATH = Path(\"/data/datasets/hcph-pilot-sourcedata/recordings/BIOPAC\")\n",
"BIDS_PATH = Path(\"/data/datasets/hcph\")\n",
"RECALIBRATED_SESSION = 23\n",
"FIRST_O2_SESSION = 11 # The cable to record O2 signal has been received midway through the acquisition\n",
"MISSING_RB = (\"excl029\", )\n",
"\n",
"participant = \"001\"\n",
"session = \"001\""
Expand Down Expand Up @@ -110,10 +102,27 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 4,
"id": "c576a79b",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] Unable to open file (unable to open file: name = '/data/datasets/hcph-pilot-sourcedata/recordings/BIOPAC/sub-001_ses-001_acq-reliability_dir-LR_physio.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [4]\u001b[0m, in \u001b[0;36m<cell line: 14>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m out_path \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 6\u001b[0m BIDS_PATH\n\u001b[1;32m 7\u001b[0m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msub-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparticipant\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mses-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;241m/\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunc\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask-\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m src_file\u001b[38;5;241m.\u001b[39mname \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdwi\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 10\u001b[0m )\n\u001b[1;32m 12\u001b[0m channels \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mh5py\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m h5f:\n\u001b[1;32m 15\u001b[0m start_recording \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\n\u001b[1;32m 16\u001b[0m h5f\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart_recording\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mrsplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m+\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 17\u001b[0m )\n\u001b[1;32m 18\u001b[0m start_run \u001b[38;5;241m=\u001b[39m h5f\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart_run\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[0;32m~/.miniconda/lib/python3.9/site-packages/h5py/_hl/files.py:533\u001b[0m, in \u001b[0;36mFile.__init__\u001b[0;34m(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, fs_strategy, fs_persist, fs_threshold, fs_page_size, page_buf_size, min_meta_keep, min_raw_keep, locking, alignment_threshold, alignment_interval, **kwds)\u001b[0m\n\u001b[1;32m 525\u001b[0m fapl \u001b[38;5;241m=\u001b[39m make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,\n\u001b[1;32m 526\u001b[0m locking, page_buf_size, min_meta_keep, min_raw_keep,\n\u001b[1;32m 527\u001b[0m alignment_threshold\u001b[38;5;241m=\u001b[39malignment_threshold,\n\u001b[1;32m 528\u001b[0m alignment_interval\u001b[38;5;241m=\u001b[39malignment_interval,\n\u001b[1;32m 529\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[1;32m 530\u001b[0m fcpl \u001b[38;5;241m=\u001b[39m make_fcpl(track_order\u001b[38;5;241m=\u001b[39mtrack_order, fs_strategy\u001b[38;5;241m=\u001b[39mfs_strategy,\n\u001b[1;32m 531\u001b[0m fs_persist\u001b[38;5;241m=\u001b[39mfs_persist, fs_threshold\u001b[38;5;241m=\u001b[39mfs_threshold,\n\u001b[1;32m 532\u001b[0m fs_page_size\u001b[38;5;241m=\u001b[39mfs_page_size)\n\u001b[0;32m--> 533\u001b[0m fid \u001b[38;5;241m=\u001b[39m \u001b[43mmake_fid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muserblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfapl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfcpl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mswmr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mswmr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(libver, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_libver \u001b[38;5;241m=\u001b[39m libver\n",
"File \u001b[0;32m~/.miniconda/lib/python3.9/site-packages/h5py/_hl/files.py:226\u001b[0m, in \u001b[0;36mmake_fid\u001b[0;34m(name, mode, userblock_size, fapl, fcpl, swmr)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m swmr \u001b[38;5;129;01mand\u001b[39;00m swmr_support:\n\u001b[1;32m 225\u001b[0m flags \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m h5f\u001b[38;5;241m.\u001b[39mACC_SWMR_READ\n\u001b[0;32m--> 226\u001b[0m fid \u001b[38;5;241m=\u001b[39m \u001b[43mh5f\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfapl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfapl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr+\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 228\u001b[0m fid \u001b[38;5;241m=\u001b[39m h5f\u001b[38;5;241m.\u001b[39mopen(name, h5f\u001b[38;5;241m.\u001b[39mACC_RDWR, fapl\u001b[38;5;241m=\u001b[39mfapl)\n",
"File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mh5py/h5f.pyx:106\u001b[0m, in \u001b[0;36mh5py.h5f.open\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] Unable to open file (unable to open file: name = '/data/datasets/hcph-pilot-sourcedata/recordings/BIOPAC/sub-001_ses-001_acq-reliability_dir-LR_physio.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)"
]
}
],
"source": [
"src_file = DATA_PATH / f\"sub-{participant}_ses-{session}_acq-reliability_dir-LR_physio.hdf5\"\n",
"# src_file = DATA_PATH / f\"sub-{participant}_ses-{session}_task-bht_dir-LR_physio.hdf5\"\n",
Expand Down
125 changes: 69 additions & 56 deletions code/physioconv/splitruns.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _get_length(filepath: Path) -> float:
sidecar = filepath.parent / sidecar_fname
meta = loads(sidecar.read_text())
if "RepetitionTime" not in meta:
global_name = sidecar.name.split("_", 2)[-1]
global_name = sidecar.name.replace("_run-1", "").replace("_run-2", "").split("_", 2)[-1]
meta |= loads((sidecar.parents[3] / global_name).read_text())

size = nb.load(filepath).shape[-1]
Expand Down Expand Up @@ -124,63 +124,76 @@ def main(
)

# Extract AcqKnowledge metadata
physio_path = scans_row.physio_files.values[0].split(",")[0]
session_data = read_file(str(data_path / physio_path))
recording_start = np.datetime64(
str(
session_data.event_markers[0]
.date_created_utc.replace(tzinfo=timezone.utc)
.astimezone(tz=None)
).split("+")[0]
)

# Calculate onsets and offsets
run_onsets = (bids_start - recording_start) / np.timedelta64(1, "s")
run_offsets = (bids_start - recording_start + bids_lengths) / np.timedelta64(1, "s")

channels = session_data.channels

clip_onsets = [0] + (run_onsets[1:] - 5.0).tolist()
clip_offsets = (run_onsets[1:] - 1.0).tolist() + [-1]

out_files = []
for run_id, (clip_on, clip_off) in enumerate(zip(clip_onsets, clip_offsets)):
run_name = (
Path(run_names[run_id])
.name.replace("_echo-1_part-mag", "")
.rsplit("_", 1)[0]
num_runs = len(run_names)
run_id = 0
for physio_path in scans_row.physio_files.values[0].split(","):
acq_session = read_file(str(data_path / physio_path))
acq_start = np.datetime64(
str(
acq_session.event_markers[0]
.date_created_utc.replace(tzinfo=timezone.utc)
.astimezone(tz=None)
).split("+")[0]
)
acq_stop = (
len(acq_session.channels[4].time_index)
/ acq_session.channels[4].samples_per_second
)
h5_filename = data_path / f"{run_name}_physio.hdf5"
with h5py.File(h5_filename, "w") as h5f:
h5f.attrs["start_recording"] = np.datetime_as_string(
recording_start,
timezone="local",
).astype("S30")
h5f.attrs["start_run"] = run_onsets[run_id]
h5f.attrs["stop_run"] = run_offsets[run_id]

for i, ch in enumerate(channels):
onset_index = (
0 if clip_on == 0 else np.abs(ch.time_index - clip_on).argmin()
)
offset_index = (
-1 if clip_off == -1 else np.abs(ch.time_index - clip_off).argmin()
)

ch_group = h5f.create_group(f"channel_{i}")
ch_group.attrs["name"] = ch.name
ch_group.attrs["units"] = ch.units
ch_group.attrs["frequency"] = ch.samples_per_second
ch_group.attrs["start_time"] = round(ch.time_index[onset_index], 6)
ch_group.create_dataset(
"data",
data=ch.data[onset_index:offset_index],
compression="gzip",
compression_opts=9,
)

print(f"Generated: {h5_filename}")
out_files.append(h5_filename)
# Calculate onsets and offsets (time limits)
run_tlims = np.tile(bids_start[run_id:], (2, 1)) - acq_start
run_tlims[1, :] += bids_lengths[run_id:]
run_tlims = run_tlims / np.timedelta64(1, "s")
run_tlims = run_tlims[:, run_tlims[1, :] < acq_stop].T
run_tlims[run_tlims[:, 0] < 0, 0] = 0.0

clip_onsets = [0] + (run_tlims[1:, 0] - 5.0).tolist()
clip_offsets = (run_tlims[1:, 0] - 1.0).tolist() + [-1]

channels = acq_session.channels

out_files = []
for chunk_i, (clip_on, clip_off) in enumerate(zip(clip_onsets, clip_offsets)):
run_name = (
Path(run_names[run_id])
.name.replace("_echo-1_part-mag", "")
.rsplit("_", 1)[0]
)
h5_filename = data_path / f"{run_name}_physio.hdf5"
with h5py.File(h5_filename, "w") as h5f:
h5f.attrs["start_recording"] = np.datetime_as_string(
acq_start,
timezone="local",
).astype("S30")
h5f.attrs["start_run"] = run_tlims[chunk_i, 0]
h5f.attrs["stop_run"] = run_tlims[chunk_i, 1]

for channel_i, ch in enumerate(channels):
onset_index = (
0 if clip_on == 0 else np.abs(ch.time_index - clip_on).argmin()
)
offset_index = (
-1 if clip_off == -1 else np.abs(ch.time_index - clip_off).argmin()
)

ch_group = h5f.create_group(f"channel_{channel_i}")
ch_group.attrs["name"] = ch.name
ch_group.attrs["units"] = ch.units
ch_group.attrs["frequency"] = ch.samples_per_second
ch_group.attrs["start_time"] = round(ch.time_index[onset_index], 6)
ch_group.create_dataset(
"data",
data=ch.data[onset_index:offset_index],
compression="gzip",
compression_opts=9,
)

print(f"Generated: {h5_filename}")
out_files.append(h5_filename)
run_id += 1

if run_id == num_runs:
break

return out_files

Expand Down

0 comments on commit dc1fe3a

Please sign in to comment.