Skip to content

Commit

Permalink
Initial commit of real data experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
emdupre committed Jan 9, 2024
1 parent 822695b commit a46ab06
Show file tree
Hide file tree
Showing 5 changed files with 474 additions and 0 deletions.
Binary file added bayes_ca/data/AG_mask.nii.gz
Binary file not shown.
1 change: 1 addition & 0 deletions bayes_ca/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .utils import ANG_ROI_FILE_PATH, bbt_data, sherlock_data, naturalistic_data
179 changes: 179 additions & 0 deletions bayes_ca/data/bbt-proc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import re
import glob
import warnings
import numpy as np
import nibabel as nib
from pathlib import Path
from nilearn import image, maskers


def _get_segment(files):
"""
Gets the segment of the movie (as encoded in the task
value) for sorting. Since some segments were shown out
of order, this is more reliable than a naive sort of
the filenames.
Parameters
----------
files : list of str
The list of filenames that should be sorted.
task : str
The name of the task, not encoding the segment.
For example, if the task value is 'figures04'
this string should be 'figures'.
Returns
-------
segments: list of str
The list of filenames, sorted by movie segment.
"""
segments = sorted(files, key=lambda x: int(re.search(f"run-(\d+)", x).group(1)))
return segments


def _nifti_mask_movie(scan, mask, confounds, smoothing_fwhm=None):
"""
Cleans movie data, including standardizing and high-pass
filtering at 0.007Hz. Corrects for supplied motion confounds
and calculates and corrects for top 5 tCompCor confounds.
Optionally smooths time series.
Parameters
----------
scan: niimg_like
An in-memory niimg
mask: str or Niimg-like
The (brain) mask within which to process data.
confounds: np.ndarray
Any confounds to correct for in the cleaned data set.
smoothing_fwhm : int
(Optional) The size of the Gaussian smoothing kernel to apply.
"""
# niftimask and clean data
masker = maskers.NiftiMasker(
mask_img=mask,
t_r=1.49,
standardize=True,
detrend=False,
high_pass=0.00714,
smoothing_fwhm=smoothing_fwhm,
memory=".nilearn-cache",
memory_level=1,
verbose=2,
)
compcor = image.high_variance_confounds(
scan, mask_img=mask, n_confounds=5, percentile=5.0
)
confounds = np.hstack((confounds, compcor))
cleaned = masker.fit_transform(scan, confounds=confounds)
return masker.inverse_transform(cleaned)


def _resample_movie(scan, mask):
"""
Parameters
----------
scan: niimg_like
An in-memory niimg
mask: str
The (brain) mask within which to process data.
"""
if mask is not None:
aff_orig = nib.load(mask).affine[:, -1]
else:
aff_orig = nib.load(scan).affine[:, -1]
target_affine = np.column_stack([np.eye(4, 3) * 3, aff_orig])

# remove NaNs and resample to 3mm isotropic
nonan_scan = image.clean_img(
scan, detrend=False, standardize=False, ensure_finite=True
)
resampled_scan = image.resample_img(
img=nonan_scan, target_affine=target_affine, target_shape=(65, 77, 65)
)
resampled_mask = image.resample_img(
img=mask, target_affine=target_affine, interpolation="nearest"
)

return resampled_scan, resampled_mask


def subset_and_process_bbt(bold_files, bold_txts, subject, n_segments=None, fwhm=None):
"""
Note that the bbt is long,with 21 separate acquisitions.
This function is therefore also designed to subset the movie
to a set number of segments; for example, to match the number
of frames across tasks.
Although this behavior is off by default (and controllable
with the n_segments argument), note that if you choose to
process the whole movie you can expect a very high memory
usage.
Parameters
----------
bold_files : list
A list of the BOLD file names to subset and process.
bold_txts : list
A list of motion regressors accompanying each BOLD file.
subject : str
Subject identifier string.
n_segments : int
The number of segments to subset from the movie.
Will error if the number of segments requested is
more than are available in the movie.
fwhm : int
The size of the Gaussian smoothing kernel to apply.
Returns
-------
postproc_fname : str
Filename for the concatenated postprocessed file,
optionally smoothed to supplied FWHM.
"""
if n_segments > 21:
warnings.warn(
"Too many segments requested ! Only 21 available. "
"To return all segments, pass None."
)
return
movie_segments = bold_files[:n_segments]
regressors = bold_txts[:n_segments]

# use the brain mask directly from templateflow,
# so all subjects have the same number of voxels.
tpl_mask = "./tpl-MNI152NLin2009cAsym_res-01_desc-brain_mask.nii.gz"
postproc_fname = Path(
f"{subject}", f"{subject}_task-GoodBadEvil_desc-fwhm{fwhm}_bold.nii.gz"
)

if postproc_fname.exists():
print(f"File {postproc_fname} already exists; skipping")
return postproc_fname

postproc_segments = []
for r, m in zip(regressors, movie_segments):
res_scan, res_mask = _resample_movie(scan=m, mask=tpl_mask)
confounds = np.loadtxt(r)
postproc = _nifti_mask_movie(
scan=res_scan, mask=res_mask, confounds=confounds, smoothing_fwhm=fwhm
)
postproc_segments.append(postproc)

movie = image.concat_imgs(postproc_segments)
nib.save(movie, postproc_fname)
return postproc_fname


if __name__ == "__main__":
# Assumes this script is co-located with data
subjects = sorted(glob.glob("sub-*"))
for s in subjects:
files = Path(s).rglob(f"wrdc{s}*GoodBadEvil*.nii.gz")
regressors = Path(s).rglob(f"rp_dc{s}*GoodBadEvil*bold.txt")
segments = _get_segment([str(f) for f in files])
matched_reg = _get_segment(str(r) for r in regressors)
subset_and_process_bbt(segments, matched_reg, s, n_segments=8, fwhm=5)
183 changes: 183 additions & 0 deletions bayes_ca/data/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import os
import h5py
import requests
import numpy as np
import nibabel as nib
from pathlib import Path
from nilearn import image
from nilearn.maskers import NiftiMasker
from sklearn.linear_model import LinearRegression

_package_directory = os.path.dirname(os.path.abspath(__file__))
ANG_ROI_FILE_PATH = os.path.join(_package_directory, "AG_mask.nii.gz")


def _fetch_sherlock_data(data_dir=None):
"""
Fetch sherlock dataset used in BrainIAK tutorials.
By default, store in current working directory, unless
data_dir is passed.
Parameters
----------
data_dir : str
Path on disk to store sherlock dataset.
"""
if data_dir is not None:
os.chdir(data_dir)
url = "https://ndownloader.figshare.com/files/9017983"
resp = requests.get(url, allow_redirects=True)
open("sherlock.h5", "wb").write(resp.content)
with h5py.File("sherlock.h5", "r") as f:
bold = f["BOLD"][()]
return bold


def _resample_ang_roi(scan, mask):
"""
Parameters
----------
scan: niimg_like
Reference Niimg for resampling.
mask: str
The (brain) mask within which to process data.
"""
aff_orig = nib.load(scan).affine[:, -1]
target_affine = np.column_stack([np.eye(4, 3) * 3, aff_orig])

resampled_mask = image.resample_img(
img=mask, target_affine=target_affine, interpolation="nearest"
)

return resampled_mask


def naturalistic_data(data_dir, gsr=False):
"""
Fetch sherlock dataset used in OHBM Naturalistic Data
tutorials. By default, store in current working directory,
unless data_dir is passed.
Parameters
----------
data_dir : str
Path on disk to store sherlock dataset.
gsr : bool
Whether or not to apply global-signal regression to the
pre-parcellated data. Default False.
Returns
-------
bold : np.arr
Array of shape (n_regions, n_TRs, n_subjects)
"""
bold = np.load(Path(data_dir, "Sherlock_AG_movie.npy")).T

if gsr:
global_signal = np.mean(bold, axis=0)
pred_bold_gs = []

for s in range(bold.shape[-1]):
ols = LinearRegression(fit_intercept=False).fit(
global_signal.T[s].reshape(-1, 1), bold[..., s].T
)
pred_bold_gs.append(ols.predict(global_signal.T[s].reshape(-1, 1)).T)

gsr_bold = bold - np.stack(pred_bold_gs, axis=2)
return gsr_bold

return bold


def bbt_data(data_dir, gsr=False):
"""
Assuming the preprocessed BBT (Du Bon, de la Brute et du Truand)
data exists on disk, fetch and mask that data with predetermined
regions-of-interest as defined in Baldassano et al. (2017).
Parameters
----------
data_dir : str or pathlib.Path
Location of the BBT pre-processed dataset on disk.
gsr : bool
Whether or not to apply global-signal regression to the
pre-parcellated data. Default False.
Returns
-------
bold : np.arr
Array of shape (n_regions, n_TRs, n_subjs)
"""
scans = []
pattern = f"*task-GoodBadEvil_desc-fwhm5_bold.nii.gz"
path = Path(data_dir)
scans.append(sorted(path.rglob(pattern)))

res_mask = _resample_ang_roi(scans[0][0], ANG_ROI_FILE_PATH)

masker = NiftiMasker(
mask_img=res_mask,
standardize=False,
memory=".nilearn-cache",
memory_level=1,
verbose=2,
).fit()

ang_vx = [masker.transform(s).T for s in sum(scans, [])]
bold = np.asarray([(v.T + np.random.random(v.T.shape)) for v in ang_vx]).T

if gsr:
global_signal = np.mean(bold, axis=0)
pred_bold_gs = []

for s in range(bold.shape[-1]):
ols = LinearRegression(fit_intercept=False).fit(
global_signal.T[s].reshape(-1, 1), bold[..., s].T
)
pred_bold_gs.append(ols.predict(global_signal.T[s].reshape(-1, 1)).T)

gsr_bold = bold - np.stack(pred_bold_gs, axis=2)
gsr_bold = gsr_bold + np.random.random(gsr_bold.shape)
return gsr_bold

return bold


def sherlock_data(gsr=False):
"""
Replicating BrainIAK HMM tutorial on sherlock data
(as distributed by the BrainIAK authors).
For the full tutorial, please see :
https://brainiak.org/tutorials/12-hmm/
Here, we only replicate the event boundary estimation
and compairison with a permuted null model.
Parameters
----------
gsr : bool
Whether or not to apply global-signal regression to the
pre-parcellated data.
Returns
-------
BOLD : np.arr
Array of shape (141, 1976, 17)
"""
bold = _fetch_sherlock_data()

if gsr:
global_signal = np.mean(bold, axis=0)
pred_bold_gs = []

for s in range(bold.shape[-1]):
ols = LinearRegression(fit_intercept=False).fit(
global_signal.T[s].reshape(-1, 1), bold[..., s].T
)
pred_bold_gs.append(ols.predict(global_signal.T[s].reshape(-1, 1)).T)

gsr_bold = bold - np.stack(pred_bold_gs, axis=2)
return gsr_bold

return bold
Loading

0 comments on commit a46ab06

Please sign in to comment.