diff --git a/01_Preprocessing/GM_matrix_create.R b/01_Preprocessing/GM_matrix_create.R new file mode 100644 index 0000000..38a31fa --- /dev/null +++ b/01_Preprocessing/GM_matrix_create.R @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) +# Create masked GM matrix (subj x voxels) for OPNMF analyses # + +### Arguments ### + +# 1: Species investigated - "chimp" or "human" +# 2: Name of meta data .csv file # +# 3: Name and relative path of GM mask Nifti image # + + +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, readr, dplyr, neurobase, oro.nifti) + +# sample species either human (IXI) or chimp for locating images and naming outputs # +cohort <- args[1] # "chimp" or "human + +# sample meta data to gather image names # +meta_name <- args[2] #"Chimp_meta_QC_u50_n189.csv" +meta_data <- read_csv(file = paste(path_wd(), "data", cohort, meta_name, + sep = "/")) +# smoothing kernel of images - chimp 4mm & IXI 6mm # +if (cohort == "chimp") { + s_kernel <- "s4" +} else if (cohort == "human") { + s_kernel <- "s6" +} else { + print("!! ERROR don't recognise species given should be chimp or human !!") +} +# file names of images to create GM data matrix # +subj_name <- c(paste0(s_kernel, "_mwp1", meta_data$Subject, ".gz")) +# path to images# +img_path <- paste(path_wd(), "data", cohort, "mwp1", sep = "/") + +# create GM index at TPM 0.3GM # +mask_name <- args[3] #"masks/Chimp_GM_TPM_03_Cortex_2mm.nii.gz" +GM_mask <- readnii(paste(path_wd(), mask_name ,sep = "/")) +# GM index mask vector # +GM_ind <- c(img_data(GM_mask > 0)) + +# empty lists # +imgs <- vector(mode = "list", length = nrow(meta_data)) +imgs_GM <- vector(mode = "list", length = nrow(meta_data)) + +# fill lists with GM data from each subject # +for (i in 1:length(subj_name)) { + # read nifti images # + imgs[[i]] <- readnii(paste(img_path, subj_name[i], sep= "/")) + # mask GM data # + imgs_GM[[i]] <- c(img_data(imgs[[i]])[GM_ind]) +} + +# create a GM matrix subj x voxels # +GM_mat <- rbind(matrix(unlist(imgs_GM), + ncol = length(imgs_GM[[1]]), byrow = TRUE)) +# Name of outpu data matrix # +if (cohort == "chimp") { + out_name <- paste0("Chimp_cort_n", nrow(meta_data), "_TPM03_mat.rds") +} else if (cohort == "human") { + out_name <- paste0("IXI_cort_n", nrow(meta_data), "_mat.rds") +} else { + print("!! ERROR don't recognise species given should be chimp or human !!") +} +# save data matrix # +saveRDS(GM_mat, file = paste(path_wd(), "data", cohort, out_name, sep="/")) + diff --git a/01_Preprocessing/README.md b/01_Preprocessing/README.md new file mode 100644 index 0000000..0ad3efa --- /dev/null +++ b/01_Preprocessing/README.md @@ -0,0 +1,6 @@ +# Structural Preprocessing + +Structural preprocessing (T1-weighted images) for chimpanzee and human cohorts utilizing [SPM12](https://www.fil.ion.ucl.ac.uk/spm/software/spm12/) and [CAT12](http://www.neuro.uni-jena.de/cat/). The Matlab scripts are to be used as batch inputs to then adapted in the SPM batch editor. + +Following preprocessing move the ```mwp1*``` and smoothed modulated images into ```$project_dir/data/$cohort/wmp1/``` directory and then run the ```run_GM_matrix_create.sh``` to create the data matrix for OPNMF parcellation. + diff --git a/01_Preprocessing/chimp_preproc_batch.m b/01_Preprocessing/chimp_preproc_batch.m new file mode 100644 index 0000000..e796f1d --- /dev/null +++ b/01_Preprocessing/chimp_preproc_batch.m @@ -0,0 +1,162 @@ +clc, clear + +% ------------------------------------------------------------------------ +% Chimpanzee batch options that can be changed in the GUI or here directly +% ------------------------------------------------------------------------ +% open the chimpanzee preprocessing from cat12 +cat12('chimpanzee'); +% set the number of cores for parallel processing +try + numcores = max(cat_get_defaults('extopts.nproc'),1); +catch + numcores = 0; +end +%% CAT preprocessing +% ------------------------------------------------------------------------ +% Dir containing chimp original T1w images % +chimps_dir = ... + 'C:\Users\svickery\Documents\preproc_test'; +% chimp images % +chimp_img = dir(fullfile(chimps_dir,'*.nii')); +% IMG need to be in a cell to be read by batch % +for subj = 1:numel(chimp_img) + matlabbatch{1}.spm.tools.cat.estwrite.data{subj,1} = ... + fullfile(chimps_dir, chimp_img(subj).name); +end +% Check name and location of template dir in SPM dir % +% This should be the dir with cat v1725 +cattempdir = fullfile(spm('dir'),'toolbox','cat12','animal_templates'); +% CAT preprocessing expert options +% SPM parameter +matlabbatch{1}.spm.tools.cat.estwrite.data_wmh = {''}; +matlabbatch{1}.spm.tools.cat.estwrite.nproc = numcores; +matlabbatch{1}.spm.tools.cat.estwrite.opts.tpm = {fullfile(cattempdir,'chimpanzee_TPM.nii')}; % Juna chimp TPM +matlabbatch{1}.spm.tools.cat.estwrite.opts.affreg = 'none'; +matlabbatch{1}.spm.tools.cat.estwrite.opts.ngaus = [1 1 2 3 4 2]; +matlabbatch{1}.spm.tools.cat.estwrite.opts.warpreg = [0 0.001 0.5 0.05 0.2]; +matlabbatch{1}.spm.tools.cat.estwrite.opts.bias.spm.biasfwhm = 30; % small values are important to remove the bias but 30 mm is more or less the limit +matlabbatch{1}.spm.tools.cat.estwrite.opts.bias.spm.biasreg = 0.001; % +matlabbatch{1}.spm.tools.cat.estwrite.opts.acc.spm.samp = 1.5; % ############# higher resolutions helps but takes much more time (e.g., 1.5 about 4 hours), so 1.0 to 1.5 mm seems to be adequate +matlabbatch{1}.spm.tools.cat.estwrite.opts.acc.spm.tol = 1e-06; % smaller values are better and important to remove the bias in some image +matlabbatch{1}.spm.tools.cat.estwrite.opts.redspmres = 0; + +% segmentation options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.APP = 1070; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.NCstr = -Inf; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.spm_kamap = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.LASstr = 1.0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.gcutstr = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.cleanupstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.BVCstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.WMHC = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.SLC = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.mrf = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.restypes.best = [0.5 0.3]; + +% registration options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.T1 = {fullfile(cattempdir,'chimpanzee_T1.nii')}; % Juna chimp T1 +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.brainmask = {fullfile(cattempdir,'chimpanzee_brainmask.nii')}; % Juna chimp brainmask +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.cat12atlas = {fullfile(cattempdir,'chimpanzee_cat.nii')}; % Juna chimp cat atlas +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.darteltpm = {fullfile(cattempdir,'chimpanzee_Template_1.nii')}; % there is no Juna chimp Dartel template as shooting is much better +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.shootingtpm = {fullfile(cattempdir,'chimpanzee_Template_0_GS.nii')}; % Juna chimp shooting template +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.regstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.vox = 2; % Downsample to 2mm for faster OPNMF processing + +% surface options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.pbtres = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.pbtmethod = 'pbt2x'; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.pbtlas = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.collcorr = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.reduce_mesh = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.vdist = 1.33333333333333; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.scale_cortex = 0.7; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.add_parahipp = 0.1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.close_parahipp = 0; + +% admin options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.experimental = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.new_release = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.lazy = 0; % ############## avoid reprocessing +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.ignoreErrors = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.verb = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.print = 2; + +% output options +matlabbatch{1}.spm.tools.cat.estwrite.output.surface = 0; % surface reconstruction - not yet optimised for non-human primates +matlabbatch{1}.spm.tools.cat.estwrite.output.surf_measures = 3; + +% volume atlas maps +matlabbatch{1}.spm.tools.cat.estwrite.output.ROImenu.atlases.chimpanzee_atlas_davi = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.ROImenu.atlases.ownatlas = {''}; % you can add own atlas maps but they have to be in the same orientation as the other template files especially the final GS template + +% surface atlas maps +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Desikan = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Destrieux = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.HCP = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_100P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_200P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_400P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_600P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.ownatlas = {''}; + +% volume output +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.mod = 1; % needed for VBM +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.warped = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.jacobianwarped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.warps = [0 0]; + +% special maps +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.native = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.dartel = 0; + +% ------------------------------------------------------------------------ + +%% smoothing +% ------------------------------------------------------------------------ +matlabbatch{2}.spm.spatial.smooth.fwhm = repmat(4,1,3); % smoothing filter size 4x4x4 +matlabbatch{2}.spm.spatial.smooth.dtype = 0; +matlabbatch{2}.spm.spatial.smooth.im = 0; +matlabbatch{2}.spm.spatial.smooth.prefix = 's4_'; +% ------------------------------------------------------------------------ + +%% +% Use to directly run script keep commented out if want to adapt using GUI +%spm_jobman('run',matlabbatch); diff --git a/01_Preprocessing/human_preproc_batch.m b/01_Preprocessing/human_preproc_batch.m new file mode 100644 index 0000000..6990a99 --- /dev/null +++ b/01_Preprocessing/human_preproc_batch.m @@ -0,0 +1,151 @@ +clc, clear +% ------------------------------------------------------------------------ +% Human batch options that can be changed in the GUI or here directly +% ------------------------------------------------------------------------ +% set the number of cores for parallel processing +try + numcores = max(cat_get_defaults('extopts.nproc'),1); +catch + numcores = 0; +end +%% CAT preprocessing +% ------------------------------------------------------------------------ +% Dir containing eNKI or IXI (human) original T1w images % +human_dir = ... + '~/project/chimp_human_opnmf/data/IXI/mwp1'; % /IXI/ or /eNKI/ +% human (eNKI or IXI) images % +human_img = dir(fullfile(human_dir,'*.nii')); +% IMG need to be in a cell to be read by batch % +for subj = 1:numel(human_img) + matlabbatch{1}.spm.tools.cat.estwrite.data{subj,1} = ... + fullfile(human_dir, human_img(subj).name); +end +% Check name and location of template dir in SPM dir % +% This should be the dir with cat v1725 +cattempdir = fullfile(spm('dir'),'toolbox','cat12','animal_templates'); +% CAT preprocessing expert options +% SPM parameter +matlabbatch{1}.spm.tools.cat.estwrite.data_wmh = {''}; +matlabbatch{1}.spm.tools.cat.estwrite.nproc = numcores; +matlabbatch{1}.spm.tools.cat.estwrite.opts.tpm = {fullfile(spm('dir'),'tpm','TPM.nii')}; % Human TPM +matlabbatch{1}.spm.tools.cat.estwrite.opts.affreg = 'mni'; +matlabbatch{1}.spm.tools.cat.estwrite.opts.biasstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.opts.accstr = 0.75; + +% segmentation options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.restypes.optimal = [1 0.3]; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.setCOM = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.APP = 1070; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.affmod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.NCstr = -Inf; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.spm_kamap = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.LASstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.LASmyostr = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.gcutstr = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.cleanupstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.BVCstr = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.WMHC = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.SLC = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.segmentation.mrf = 1; + +% registration options +% used 1mm shooting template provided by Christian Gaser 'Template_0_GS1mm.nii' % +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.regmethod.shooting.shootingtpm = {fullfile(spm('dir'),'toolbox','cat12','templates_MNI152NLin2009cAsym','Template_0_GS.nii')}; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.regmethod.shooting.regstr = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.vox = 1; % Downsample to 3mm for faster OPNMF processing +matlabbatch{1}.spm.tools.cat.estwrite.extopts.registration.bb = 45; + +% surface options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.pbtres = 0.5; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.pbtmethod = 'pbt2x'; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.SRP = 22; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.reduce_mesh = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.vdist = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.scale_cortex = 0.7; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.add_parahipp = 0.1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.surface.close_parahipp = 1; + +% admin options +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.experimental = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.new_release = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.lazy = 0; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.ignoreErrors = 1; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.verb = 2; +matlabbatch{1}.spm.tools.cat.estwrite.extopts.admin.print = 2; + +% output options +matlabbatch{1}.spm.tools.cat.estwrite.output.BIDS.BIDSno = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.surface = 0; % surface reconstruction - not yet optimised for non-human primates +matlabbatch{1}.spm.tools.cat.estwrite.output.surf_measures = 3; + +% surface atlas maps +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Desikan = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Destrieux = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.HCP = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_100P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_200P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_400P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.Schaefer2018_600P_17N = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.sROImenu.satlases.ownatlas = {''}; + +% volume output +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.mod = 1; % needed for VBM +matlabbatch{1}.spm.tools.cat.estwrite.output.GM.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WM.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.CSF.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.warped = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.bias.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.jacobianwarped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.warps = [0 0]; + +% special maps +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.ct.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.pp.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.WMH.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.SL.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.mod = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.TPMC.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.atlas.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.native = 1; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.label.dartel = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.native = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.warped = 0; +matlabbatch{1}.spm.tools.cat.estwrite.output.las.dartel = 0; + +% ------------------------------------------------------------------------ + +%% smoothing +% ------------------------------------------------------------------------ +matlabbatch{2}.spm.spatial.smooth.fwhm = repmat(6,1,3); % smoothing filter size 4x4x4 +matlabbatch{2}.spm.spatial.smooth.dtype = 0; +matlabbatch{2}.spm.spatial.smooth.im = 0; +matlabbatch{2}.spm.spatial.smooth.prefix = 's6_'; +% ------------------------------------------------------------------------ + +%% +% Use to directly run script keep commented out if want to adapt using GUI +%spm_jobman('run',matlabbatch); diff --git a/01_Preprocessing/run_GM_matrix_create.sh b/01_Preprocessing/run_GM_matrix_create.sh new file mode 100644 index 0000000..ee5d162 --- /dev/null +++ b/01_Preprocessing/run_GM_matrix_create.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# Run script for GM matrix create # +# which creates the data matrix for input into OPNMF analyses # + +# The R script requires 3 input arguments # +# 1 - Speceis investigated - "chimp" or "human" +# 2 - Name of the cohort phenotype data file e.g. chimp_meta_QC_n189.csv +# 3 - Name and relative path from project dir to GM mask e.g. "wd/masks/chimp_GM_TPM_03_Cortex_2mm.nii.gz + +# Name of R script # +r_script=GM_matrix_create.R + +# Cohort speceis chimp or human # +species="chimp" + +# Name of meta data csv file to gather sample for data matrix # +cohort="chimp_meta_QC_n189.csv" + +# path to GM mask # +GM_mask="masks/chimp_GM_TPM_03_Cortex_2mm.nii.gz" + +# run R script +Rscript $r_script $species $cohort $GM_mask +echo Creating $species data matrix + + diff --git a/02_Cross-species_Registration/README.md b/02_Cross-species_Registration/README.md new file mode 100644 index 0000000..a21f603 --- /dev/null +++ b/02_Cross-species_Registration/README.md @@ -0,0 +1,6 @@ +# Cross-Species Registration + +The modulated jacobians and used to create the cross-species expansion maps as well as the chimpanzee (Juna) to human (MNI) deformation field map for deforming OPNMF solution to MNI space can be found [here](https://zenodo.org/record/7116203#.YzLvCfexWV4). + +The ```exp_create.sh``` script uses FSL in particular ```fslmaths``` to mask and establish the expansion maps from the jacobians. + diff --git a/02_Cross-species_Registration/exp_create.sh b/02_Cross-species_Registration/exp_create.sh new file mode 100644 index 0000000..6a11dd0 --- /dev/null +++ b/02_Cross-species_Registration/exp_create.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +##### FSL v5.0 ##### +## Create expansion maps from baboon cross species registration using fslmaths ## +## This script should be run in code dir that is then ../data/deformations/ away ## +## from the deformation and T1 images. This can be changed by adapting $data_dir ## + +## 5 INPUTS ## +# INPUT 1 - Name of T1 image for brain mask (string) +# INPUT 2 - Name of modulated jacobian (string) +# INPUT 3 - Name of template (MNI, Haiko, NMT, D99, INIA19, MeanMacaque) +# INPUT 4 - Minimum threshold value for thresholding modulated jacobian for extreme values +# INPUT 5 - Max threshold value for thresholding modulated jacobian for extreme values + +## OUTPUTS ## +# Directory for output files as name of template in $data_dir +# 1 - brain mask +# 2 - brain masked jacobian +# 3 - brain masked expansion map +# 4 - brain masked jacobian with threshold +# 5 - voxels above threshold +# 5 - voxels below threshold + +# prints the relative path to the script # +echo -e "script dir:\n$0" + +# using relative paths creates te complete path to the dir and prints # +# from code dir with script go one up and then to Deformations where data is # +data_dir="$(dirname $(dirname $(realpath $0) ) )/data/Deformations" +echo -e "Deformations Dir:\n$data_dir" + +# name of registered T1 template image used to mask the deformation map # +# INPUT 1 # +T1_name=$1 +echo -e "T1 Name: \n$data_dir/$T1_name" + +# name of modulated jacobian file +# INPUT 2 # +def_name=$2 +echo -e "Deformation Name: \n$data_dir/$def_name" + +# User input of template name if MNI then chimp-human exp factor # +# if not then go through the different baboon and macaque templates that can be used # +# if none of these are given print error message # +if [[ "$3" == "MNI" ]]; then + echo "#### Human (MNI) Pipeline" + # human brain ~ 3.5x chimp so 1/35 = 0.286 + out_name=$3 + exp_factor=0.286 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +elif [[ "$3" == "Haiko" ]]; then + echo "#### Baboon Pipeline ####" + # chimp brain ~2.5x baboon so 1/2.5 = 0.4 manipulation + out_name=$3 + exp_factor=0.4 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +elif [[ "$3" == "NMT" ]]; then + echo "#### Macaque Pipeline ####" + # chimp brain ~4.5x macaque brain so 1/4.5 = 0.22 manipulation + out_name=$3 + exp_factor=0.22 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +elif [[ "$3" == "D99" ]]; then + echo "#### Macaque Pipeline ####" + # 0.22 manipulation with macaque # + out_name=$3 + exp_factor=0.22 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +elif [[ "$3" == "INIA19" ]]; then + echo "#### Macaque Pipeline ####" + # 0.22 manipulation for macaque # + out_name=$3 + exp_factor=0.22 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +elif [[ "$3" == "MeanMacaque" ]]; then + echo "#### Macaque Pipeline ####" + # 0.22 manipulation for macaque # + out_name=$3 + exp_factor=0.22 + [ ! -d $data_dir/$out_name ] && mkdir $data_dir/$out_name + +else + echo -e "#### DO NOT RECOGNISE TEMPLATE NAME ####\n#### ERROR ####" + exp_factor=0 +fi + +# a non threholded map will be created along with an image with provided threshold # +# INPUT 4 lower limit of threshold for jacoabian # +min_thr=$4 + +# INPUT 5 upper limt of threshold for jacobian # +max_thr=$5 + +# Create a mask of the brain # +# brian mask output file name # +mask_out_name=$data_dir/${out_name}/${out_name}_brain_mask.nii.gz +echo -e "Brain mask: \n$mask_out_name" + +# check if mask doesn't exists # +if [ ! -e "$mask_out_name" ]; then + fslmaths ${data_dir}/$T1_name -ero -bin $mask_out_name +fi + +# mask the deformation map using the brain mask # +# create masked jacobian deformation map # +jac_out_name=$data_dir/${out_name}/${out_name}_jac_brain.nii.gz +echo -e "Jacobian Brain: \n$jac_out_name" + +# check if mask doesn't exists # +if [ ! -e "$jac_out_name" ]; then + fslmaths ${data_dir}/$def_name -mas $mask_out_name $jac_out_name +fi + +# create the expansion map using the expansion factor # +# name of expansion map with any threshold # +exp_no_thr=$data_dir/${out_name}/${out_name}2Juna_expansion_noThr.nii.gz +echo -e "Expansion No Threshold: \n$exp_no_thr" + +if [ ! -e "$exp_no_thr" ]; then + fslmaths $mask_out_name -div $exp_factor -div $jac_out_name $exp_no_thr +fi + +# create additional expansion map using upper and low threshold of extreme values # +exp_thr=$data_dir/${out_name}/${out_name}2Juna_expansion_${min_thr}_${max_thr}.nii.gz +echo -e "Expansion Thresholded: \n$exp_thr" + +# If the expanaion map has not been created using the same threshold then create it # +if [ ! -e "$exp_thr" ] + then + + # create mask for upper threshold to be added back to final image # + max_thr_out=${data_dir}/${out_name}/${out_name}_${max_thr} + fslmaths ${data_dir}/$def_name -thr $max_thr -mas $mask_out_name \ + -bin -mul $max_thr $max_thr_out + + # create mask for lower threshold to be added back to final image # + min_thr_out=${data_dir}/${out_name}/${out_name}_${min_thr} + fslmaths ${data_dir}/$def_name -uthr $min_thr -mas $mask_out_name \ + -bin -mul $min_thr $min_thr_out + + # threshold image with upper and lower and make values outside the threshold value # + jac_min_max=${data_dir}/${out_name}/${out_name}_jac_thr_${min_thr}_${max_thr} + fslmaths ${data_dir}/$def_name -thr $min_thr -uthr $max_thr -mas $mask_out_name \ + -add $max_thr_out -add $min_thr_out $jac_min_max + + # Now create expansion map with the thresholded jacobian # + fslmaths $mask_out_name -div $exp_factor -div $jac_min_max $exp_thr + +fi + + diff --git a/03_OPNMF/01_run_OPNMF_fact.sh b/03_OPNMF/01_run_OPNMF_fact.sh new file mode 100644 index 0000000..57849b2 --- /dev/null +++ b/03_OPNMF/01_run_OPNMF_fact.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# OPNMF run script (OPNMF_fact.R) +# OPNMF script requires 2 arguments +# 1 - relative path from working dir to input matrix and name +# 2 - Number of OPNMF factors to be created + +# Name of R script # +r_script=OPNMF_fact.R + +# input data matrix # +# The relative path from the working/project dir also needs to be provided # +inputMat="data/chimp/cort_n189_02GM_mat_rds.data" + +# OPNMF rank range +# rank=17 + +# loop over rank range and save outputs in wd/outputs/ # +for rank in $(seq 2 1 40); do + + # run OPNMF R script + Rscript $r_script $inputMat $rank + echo using $rscript on $inputMat with $rank factors + +done diff --git a/03_OPNMF/02_run_OPNMF_brain_map.sh b/03_OPNMF/02_run_OPNMF_brain_map.sh new file mode 100755 index 0000000..eab6e09 --- /dev/null +++ b/03_OPNMF/02_run_OPNMF_brain_map.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# run script OPNMF solution brain projection +# R script requires 2 aguments # +## 1: Cohort to be investigated - "chimp" or "human" + +## 2: relative path to GM mask for cohort e.g. "/masks/Chimp_GM_TPM_03_Cortex_2mm.nii.gz" +## Here wd = ~/project/chimp_human_opnmf/ which is hard coded into R script + +# Cohort # +cohort="chimp" +echo "$cohort brain Projection" + +# relative path from wd to GM mask # +mask_img="/masks/Chimp_GM_TPM_03_Cortex_2mm.nii.gz" + + +# Run Rscript # +Rscript OPNMF_brain_map.R $cohort $mask_img + + diff --git a/03_OPNMF/03_Chimp_opnmf_def_batch.m b/03_OPNMF/03_Chimp_opnmf_def_batch.m new file mode 100644 index 0000000..d866ba8 --- /dev/null +++ b/03_OPNMF/03_Chimp_opnmf_def_batch.m @@ -0,0 +1,26 @@ +clc, clear +% SPM12 CAT12 batch for utilising chimpanzee to human deformation map to +% deform chimp OPNMF solutions to human MNI space to conduct ARI similarity +% analyses + +% working project directory % +wd = '/home/svickery/projects/chimp_human_opnmf/'; + +% chimp - human deformation field map location % +def_map = fullfile(wd, 'data', 'expansion', 'y_JunaChimp_brain.nii,1'); + +% all chimp OPNMF solutions % +% !!! OPNMF solutions need to be .nii to be used in spm batch !!! % +opnmf_imgs = dir(fullfile(wd, 'data', 'chimp', 'opnmf_parc', '*.nii')); + +% batch options % +matlabbatch{1}.spm.tools.cat.tools.defs2.field = {def_map}; +for subj = 1:numel(opnmf_imgs) + matlabbatch{1}.spm.tools.cat.tools.defs2.images{subj,1} = ... + {fullfile(opnmf_imgs.folder , opnmf_imgs(subj).name)}'; +end + matlabbatch{1}.spm.tools.cat.tools.defs2.bb = [NaN NaN NaN + NaN NaN NaN]; +matlabbatch{1}.spm.tools.cat.tools.defs2.vox = [3 3 3]; +matlabbatch{1}.spm.tools.cat.tools.defs2.interp = 0; +matlabbatch{1}.spm.tools.cat.tools.defs2.modulate = 0; diff --git a/03_OPNMF/04_ARI_sim.R b/03_OPNMF/04_ARI_sim.R new file mode 100755 index 0000000..ac8f1e3 --- /dev/null +++ b/03_OPNMF/04_ARI_sim.R @@ -0,0 +1,56 @@ +# Compares parcel similarity between chimp and human OPNMF solutions # +# chimpanzee OPNMF solutions have been registered to human MNI space # +# using 03_Chimp_opnmf_def_batch.m SPM batch script # + +# working project directory # +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, neurobase, oro.nifti) + +### A utility script needs to be in wd/code which contains helper functions +# needed for ari_sim function # +source(file = paste(path_wd(), "code", "OPNMF_util.R", sep = "/")) +### OUTPUTS: + +parc1_path <- paste(path_wd(), "data", "chimp", "opnmf_parc", sep = "/") +parc2_path <- paste(path_wd(), "data", "IXI","opnmf_parc", sep = "/") + +# GM mask use the chimp parcellation in MNI space # +GM_mask <- readnii(paste(parc1_path,"wchimp_cortex_n189_TPM03_rank_2.nii.gz", + sep = "/")) +# index GM voxels to be replaced with opnmf parcellation # +GM_ind <- c(img_data(GM_02_cort > 0)) +# create vector of GM mask # +#GM_02_vol <- c(img_data(GM_02_cort)) + +# path to images # + +ranks <- 2:40 +ari_sim <- rep(NA, length(ranks)) + +for (i in 1:length(ranks)) { + # 1st parcellation GM masked # + parc1 <- readnii( + paste(parc1_path, + paste("wchimp_cort_n189_TPM03_rank_", ranks[i], "_median.nii", sep = ""), + sep = "/") + ) + # 2nd parc GM masked # + parc2 <- readnii( + paste(parc2_path, + paste("IXI_cortex_n480_rank_", ranks[i], "_median.nii.gz", sep = ""), + sep = "/") + ) + # calculate ARI for parcellations # + ari_sim[i] <- parc_sim_ARI(parc1, parc2, GM_mask) +} + +ari_df <- data.frame("ranks" = ranks, "ARI_IXI_Chimp" = ari_sim) +# melt for easier plotting # +ari_tib <- melt(ari_df, id.vars = c("Ranks")) +saveRDS(ari_tib, file = paste(path_wd(), "outputs", + "Chimp_TPM03_2_IXI_ARI.rds", sep = "/")) diff --git a/03_OPNMF/05_run_OPNMF_boot.sh b/03_OPNMF/05_run_OPNMF_boot.sh new file mode 100755 index 0000000..0f77830 --- /dev/null +++ b/03_OPNMF/05_run_OPNMF_boot.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# OPNMF run script (OPNMF_boot.R) +# OPNMF script requires 4 arguments +# 1 - relative path from working dir to input matrix and name +# 2 - Number of OPNMF factors to be created +# 3 - Number of bootstraps +# 4 - Seed for reproducibility + +# Name of R script for OPNMF bootstrapping # +r_script=OPNMF_boot.R + +# input data matrix # +inputMat=chimp_cort_n189_02GM_mat.rds + +# OPNMF rank +#rank=17 + +# number of bootstraps # +numboot=30 + +# set seed for random generation of boostraps and permutations +num_seed=47 + +for rank in $(seq 2 1 10); do + + Rscript $r_script $inputMat $rank $numboot $num_seed + echo OPNMF bootstrapping on $inputMat: Ranks - $rank, bootstraps - $numboot, seed: $num_seed + +done diff --git a/03_OPNMF/06_MSE_ARI_plot.R b/03_OPNMF/06_MSE_ARI_plot.R new file mode 100755 index 0000000..a4a96eb --- /dev/null +++ b/03_OPNMF/06_MSE_ARI_plot.R @@ -0,0 +1,173 @@ +#### create a single plot for the 3 samples MSE change with 30 bootstraps #### +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(dplyr, ggplot2, fs, neurobase, oro.nifti, reshape2, purrr, + tidyr, tibble, readr) + +# paths to the boot diff daat from each sample # +chimp_path <- paste(path_wd(), "data", "chimpanzee", "bootstraps", sep = "/") +ixi_path <- paste(path_wd(), "data", "IXI", "bootstraps", sep = "/") + +# list bootstrap output files # +# multiple files for the same rank due to high compute time # +chimp_fils <- list.files(path = chimp_path) +ixi_fils <- list.files(path = ixi_path) + +# empty list for bootstrap mse data to calculate change # +chimp_mse <- vector(mode = "list", length = length(chimp_fils)) +# empty vector for bootstrap runs rank number # +chimp_ranks <- vector(length = length(chimp_fils)) +for (i in 1:length(chimp_fils)) { + + # read bootstrapped output files # + c_dat <- read_rds(file = paste(chimp_path, chimp_fils[i], sep = "/")) + + # add rank for plotting and organising different bootstrap runs # + chimp_ranks[i] <- c_dat$rank + + # extract mse of bootstrapping run # + chimp_mse[[i]] <- c_dat$mse[[1]][["orig"]] + +} + +# empty list for bootstrap mse data to calculate change # +ixi_mse <- vector(mode = "list", length = length(ixi_fils)) +# empty vector for bootstrap runs rank number # +ixi_ranks <- vector(length = length(ixi_fils)) +for (x in 1:length(ixi_fils)) { + + # read bootstrapped output files # + h_dat <- read_rds(file = paste(ixi_path, ixi_fils[x], sep = "/")) + + # add rank for plotting and organizing different bootstrap runs # + ixi_ranks[x] <- h_dat$rank + + # extract mse of bootstrapping run # + ixi_mse[[x]] <- h_dat$mse[[1]][["orig"]] + +} + +## Create nested data.frame containing chimp and human data ## + +# chimp boot nested tibble # +chimp_boot_tib <- tibble(ranks = chimp_ranks, + mse = chimp_mse, + cohort = as.factor("Chimpanzee")) %>% + # as multiple runs of the same rank need to join all mse values + group_by(ranks) %>% + # create a nested col with all bootstraps per rank + nest(mse_data = mse) %>% + arrange(ranks) %>% + # unlist them into one list with 100 entries # + mutate(mse_data = map(mse_data, ~unlist(.x))) +chimp_boot_tib + +# human boot nested tibble # +ixi_boot_tib <- tibble(ranks = ixi_ranks, + mse = ixi_mse, + cohort = as.factor("Human")) %>% + # as multiple runs of the same rank need to join all mse values + group_by(ranks) %>% + # create a nested col with all bootstraps per rank + nest(mse_data = mse) %>% + arrange(ranks) %>% + # unlist them into one list with 100 entries # + mutate(mse_data = map(mse_data, ~unlist(.x))) +ixi_boot_tib + +# calculate the difference in ranks mse of each bootstrap # +# wanted to use lag() but run into problems with nest column so will # +# just loop through the rows # + +# chimpanzees # +chimp_mse_diff <- vector(mode = "list", length = nrow(chimp_boot_tib)-1) +chimp_mse_diff_mean <- vector(length = length(chimp_mse_diff)) +chimp_mse_diff_sd <- vector(length = length(chimp_mse_diff)) + +# Humans # +ixi_mse_diff <- vector(mode = "list", length = nrow(ixi_boot_tib)-1) +ixi_mse_diff_mean <- vector(length = length(ixi_mse_diff)) +ixi_mse_diff_sd <- vector(length = length(ixi_mse_diff)) + +for (r in 1:length(chimp_mse_diff)) { + + # difference mse lists of increasing rank granularity # + chimp_mse_diff[[r]] <- chimp_boot_tib$mse_data[[r+1]] - chimp_boot_tib$mse_data[[r]] + ixi_mse_diff[[r]] <- ixi_boot_tib$mse_data[[r+1]] - ixi_boot_tib$mse_data[[r]] + + # mean and sd of mse diff # + # chimp # + chimp_mse_diff_mean[r] <- mean(chimp_boot_tib$mse_data[[r+1]] - chimp_boot_tib$mse_data[[r]]) + chimp_mse_diff_sd[r] <- sd(chimp_boot_tib$mse_data[[r+1]] - chimp_boot_tib$mse_data[[r]]) + # human # + ixi_mse_diff_mean[r] <- mean(ixi_boot_tib$mse_data[[r+1]] - ixi_boot_tib$mse_data[[r]]) + ixi_mse_diff_sd[r] <- sd(ixi_boot_tib$mse_data[[r+1]] - ixi_boot_tib$mse_data[[r]]) + +} + +# create df for plotting change in mse # + +mse_diff_df <- rbind(chimp_boot_tib, ixi_boot_tib) %>% + select(-mse_data) %>% + # create empty column for mean mse, sd and ari data # + mutate(mse_ari_dat = NA, + sd = NA) + +# input mean and sd value # +# there is no difference value for rank 2 but there is an ARI # +# very hacky !! # +# chimp # +mse_diff_df$mse_ari_dat[2:39] <- chimp_mse_diff_mean +mse_diff_df$sd[2:39] <- chimp_mse_diff_sd +# human # +mse_diff_df$mse_ari_dat[41:78] <- ixi_mse_diff_mean +mse_diff_df$sd[41:78] <- ixi_mse_diff_sd + +# ARI INPUT # +ari_dat <- read_rds(file = paste(path_wd(), "outputs", + "Chimp_TPM03_2_IXI_ARI.rds", sep = "/")) + +# change col names to match mse data # +colnames(ari_dat) <- c("ranks", "cohort", "mse_ari_dat") +# add sd column to match mse results with NAs +ari_dat$sd <- NA +# add manipulation for secondary axis # +ari_dat_sec <- ari_dat %>% + mutate(mse_ari_dat = map_dbl(mse_ari_dat, ~ (.x - 0.4) / 0.1)) +# join the ari and mse data for plotting # +mse_ari <- full_join(ari_dat_sec, mse_diff_df) + +# change factor levels to match other figures # +mse_ari$cohort <- factor(mse_ari$cohort, + levels = c("Chimpanzee", "Human", "ARI_IXI_Chimp")) +# rename the factor levels for better labels in plot # +levels(mse_ari$cohort) <- c("Chimpanzee", "Human", "ARI") + +# plot # +mse_diff_plot <- ggplot(mse_ari, aes(x = ranks, y = mse_ari_dat, color = cohort, + fill = cohort)) + + geom_ribbon(aes(ymin=mse_ari_dat-sd, ymax=mse_ari_dat+sd), alpha=0.3, + color = NA) + # creates cloud of +- sd + geom_line(size = 1) + + geom_point(aes(shape = cohort), size = 3) + + scale_shape_manual(values = c(15, 16, 18)) + + scale_color_brewer(palette = "Dark2") + + scale_fill_brewer(palette = "Dark2") + + scale_x_continuous(limits = c(2,40),breaks = seq(2, 40, by = 2)) + + scale_y_continuous(limits = c(-4,0), breaks = seq(-4, 0, by = 0.5), + name = "Reconstruction Error Change", + sec.axis = sec_axis(~ (. *0.1) + 0.4, name = "Adjusted Rand Index")) + + geom_vline(xintercept = c(17), color = "grey", linetype = "dashed", size = 1) + + labs(x = "OPNMF Granularity", y = "Reconstruction Error Change") + + theme_classic(base_size = 25) + + theme(legend.title = element_blank(), legend.position = c(0.7, 0.2)) + +mse_diff_plot + +ggsave(paste(path_wd(), "outputs", + "Chimp_IXI_MSE_ARI_R2_40_plot.png", sep = "/")) + diff --git a/03_OPNMF/OPNMF_boot.R b/03_OPNMF/OPNMF_boot.R new file mode 100755 index 0000000..8a768d6 --- /dev/null +++ b/03_OPNMF/OPNMF_boot.R @@ -0,0 +1,69 @@ +#!/usr/bin/env Rscript + +# OPNMF on data matrix (subj x voxel) over a range of ranks or factors by steps +# of one. W = factor matrix, H = subject matrix +args = commandArgs(trailingOnly=TRUE) +### Arguments #### + +## 1: relative path to data matrix (subj x voxel) as .rds file ## +## if matrix is in wd/data/mat.rds then arg="data/mat.rds ## + +## 2: factor number for OPNMF ## + +## 3: Number of bootstraps ## + +## 4: Seed for reproducibility ## + +## INPUT: data matrix .rds file ## +## OUTPUT: OPNMF bootstrapping output data for a factor solution ## + +# !!set working directory!! # +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load packages +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, remotes) +# Biocmanager is needed to install Biobase and then NMF packages # +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +install.packages("Biobase") +# install opnmfR package from github # +remotes::install_github("kaurao/opnmfR") + +# bootstrapping perm sel function built on top of opnmfR_ranksel_perm +# location of github repo code # +print("load opnmfR_boot.R functoin") +source(paste(path_wd(), "code", "03_OPNMF", "opnmfR_boot.R", sep = "/")) +print("DONE!") + +# load input matrix as a .rds # +input_file <- paste(path_wd(), args[1] , sep="/") + +print(paste0("Read input data matrix: ",input_file)) +input_mat <- readRDS(input_file) +print("DONE!") + +# seed for permutation and bootstrapping for reproducibility # +seed <- as.integer(args[4]) +print(paste("Seed:", seed, sep = " ")) +# run perm rank selection with bootstrapping of original matrix # +print(paste0("Run OPNMF bootsrapping on factor ", args[2], " with boot = ", args[3])) +perm_boot_sel <- opnmfR_ranksel_perm_boot(X = t(input_mat), rs = as.integer(args[2]), + W0 = "nndsvd", max.iter = 5e4, + use.rcpp = TRUE, seed = seed, nboot = as.integer(args[3]), + fact = FALSE, plots = FALSE) +print("DONE!") + +# add the rank to the output list for easier post-processing # +perm_boot_sel$rank <- as.integer(args[2]) + +# out dir to print outputs to # +out_dir <- paste(path_wd(), "outputs", sep="/") +# Name string of output file # +out_name <- paste0(args[1], "_rank_", args[2], "_nboot_", args[3], + "_seed_", args[4],"_perm_boot_sel.rds") +print(paste0("Bootstapping data saved here: ", out_dir, "/", out_name)) +# save outputs # +saveRDS(perm_boot_sel, file = paste(out_dir, out_name, sep="/")) +print("DONE!") diff --git a/03_OPNMF/OPNMF_brain_map.R b/03_OPNMF/OPNMF_brain_map.R new file mode 100644 index 0000000..84bea64 --- /dev/null +++ b/03_OPNMF/OPNMF_brain_map.R @@ -0,0 +1,147 @@ +#!/usr/bin/env Rscript + +# create OPNMF brain parcellations by projecting W matrix onto volume space # + +args = commandArgs(trailingOnly=TRUE) +### Arguments #### + +# 1: Species of OPNMF output - "chimip" or "human" # +# 2: relative path and name of GM mask used in creating OPNMF data matrix and OPNMF solutions # + +# Utilising thse arguments will gather the .rds files which contain the output from OPNMF_fact.R # +# These should be in the directory 'wd/data/Speceis(e.g. chimp or human)/W_H/*.rds # +# When downloading the OPNMF solutions from (....Zenodo....) they will have this structure for each species # + + +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load packages +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, oro.nifti, neurobase, readr) + +print(paste0("###### WORKING DIR: ", path_wd(), " #######")) + +# load OPNMF helper functions for simpler creation of brain Niftis # +print(paste0("Source helper functions from ", + paste(path_wd(), "code", "OPNMF_util.R", sep = "/"))) +source(file = paste(path_wd(), "code", "OPNMF_util.R", sep = "/")) +print("DONE!") +# input 1: Species string for gathering OPNMF solutions and naming output niftis # +cohort <- args[1] + +# input 2: GM for projecting OPNMF solutions # +GM_mask <- paste0(path_wd(), args[2]) + +print(paste0("Read in GM mask from ", GM_mask)) +GM_mask_img <- readnii(GM_mask) +print("DONE!") + +# Name of mask to easily gather OPNMF W H outputs as this is how they are saved +# following OPNMF_fact.R e.g. "{GM_mask_name}_mat_rank_2_W_H.rds +GM_mask_name <- paste(sub(pattern = ".nii.gz", replacement = "\\1", + basename(GM_mask))) +# index GM voxels to be replaced with opnmf parcellation # +GM_ind <- c(img_data(GM_mask_img > 0)) +# create vector of GM mask # +GM_vol <- c(img_data(GM_mask_img)) +# volume for wrting out OPNMF parcellation +out_vol <- GM_vol + +# Create species specific images with specific file path and file names # +if (cohort == "chimp") { + + # path to WH data # + W_H_path <- paste(path_wd(), "data", cohort, "W_H", sep = "/") + # name of W_H .rds files # + W_H_names <- list.files(path = W_H_path, + pattern = glob2rx(pattern = "chimp_cort*.rds")) + + # create parcallation output dir + parc_dir <- paste(path_wd(), "data", cohort, "opnmf_parc", sep="/") + # if outputs dir doesn't exists create it # + if (!dir_exists(parc_dir)) { + dir_create(parc_dir) + print(paste0("created ", parc_dir)) + } else { + print(paste0(parc_dir, " already created")) + } + + # create OPNMF brain images using WH data in dir # + # Only create images that have not been created at "path_wd()/dat/chimp/opnmf_parc/" + + for (i in 1:length(W_H_names)) { + + # volume for wrting out OPNMF parcellation + parc_vol <- GM_vol + + # W_H data for creating brain projection # + W_H_data <- readRDS(file = paste(W_H_path, W_H_names[i], sep = "/")) + + # name of output parcellation brain # + parc_name <- paste(parc_dir, paste0("Chimp_cort_n", ncol(W_H_data$H), + "_TPM03_rank_", ncol(W_H_data$W), ".nii.gz"), sep = "/") + + if (!file_exists(parc_name)) { + # check if image has already been mapped out # + parc_vol[GM_ind] <- apply(W_H_data$W, 1, which.max) + + # write out image # + print(paste0("create ", parc_name)) + vol_2_img(vol_vec = parc_vol, img_info = GM_mask_img, + img_name = basename(parc_name), location = parc_dir) + print("DONE!") + } else { + print(paste0(parc_name, " already created")) + } + + } + +} else if (cohort == "human") { + W_H_path <- paste(path_wd(), "data", cohort, "W_H", sep = "/") + W_H_names <- list.files(path = W_H_path, + pattern = glob2rx(pattern = "IXI_cort*.rds")) + + # create parcallation output dir + parc_dir <- paste(path_wd(), "data", cohort, "opnmf_parc", sep="/") + + # if outputs dir doesn't exists create it # + if (!dir_exists(parc_dir)) { + dir_create(parc_dir) + print(paste0("created ", parc_dir)) + } else { + print(paste0(parc_dir, " already created")) + } + + # create OPNMF brain images using WH data in dir # + # Only create images that have not been created at "path_wd()/dat/chimp/opnmf_parc/" + + for (i in 1:length(W_H_names)) { + + # volume for wrting out OPNMF parcellation + parc_vol <- GM_vol + + # W_H data for creating brain projection # + W_H_data <- readRDS(file = paste(W_H_path, W_H_names[i], sep = "/")) + + # name of output parcellation brain # + parc_name <- paste(parc_dir, paste0("IXI_cort_n", ncol(W_H_data$H), + "_rank_", ncol(W_H_data$W), ".nii.gz"), + sep = "/") + + if (!file_exists(parc_name)) { + # check if image has already been mapped out # + parc_vol[GM_ind] <- apply(W_H_data$W, 1, which.max) + + # write out image # + print(paste0("create ", parc_name)) + vol_2_img(vol_vec = parc_vol, img_info = GM_mask_img, + img_name = basename(parc_name), location = parc_dir) + print("DONE!") + } else { + print(paste0(parc_name, " already created")) + } + } +} else { + print("!! ERROR don't recognise species given should be chimp or human !!") +} diff --git a/03_OPNMF/OPNMF_fact.R b/03_OPNMF/OPNMF_fact.R new file mode 100755 index 0000000..f2a4334 --- /dev/null +++ b/03_OPNMF/OPNMF_fact.R @@ -0,0 +1,67 @@ +#!/usr/bin/env Rscript + +# OPNMF on data matrix (subj x voxel) over a range of ranks or factors by steps +# of one. W = factor matrix, H = subject matrix +args = commandArgs(trailingOnly=TRUE) +### Arguments #### + +## 1: relative path to data matrix (subj x voxel) as .rds file ## +## if matrix is in wd/data/mat.rds then arg="data/mat.rds ## + +## 2: Number of ranks or factors for OPNMF ## + +## INPUT: data matrix .rds file ## +## OUTPUT: OPNMF solutions over range selected as list .rds file ## + +# !!set working directory!! # +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load packages +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, remotes) +# Biocmanager is needed to install Biobase and then NMF packages # +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +install.packages("Biobase") +# install opnmfR package from github # +# might be better to remotes::install_local() after downloading the repo with +# Microsoft open R # +remotes::install_github("kaurao/opnmfR") + +# input data matrix file (.rds #) +input_file <- paste(path_wd(), args[1] , sep="/") +print(paste0("read iput matrix: ", input_file)) +input_mat <- readRDS(input_file) +print("DONE!") +# nput file name for naming OPNMF output .rds fle ' +input_name <- paste(sub(pattern = ".rds", replacement = "\\1", + basename(input_file))) + +# factors to be investigated # +rank <- as.integer(args[2]) +# Empty list for the different OPNMF rank solutions +W_H <- vector(mode = "list", length = length(rank)) + +print(paste0("Run OPNMF with ", rank, " factors")) +# opnmfRcpp is used for improved compute speed # +W_H <- opnmfR::opnmfRcpp(X = t(input_mat), r = rank, + W0 = "nndsvd", max.iter = 5e4) +print("DONE!") + +out_dir <- paste(path_wd(), "outputs", sep="/") +# if outputs dir doesn't exists create it # +if (!dir_exists(out_dir)) { + dir_create(out_dir) + print(paste0("created ", out_dir)) +} else { + print(paste0(out_dir, " already created")) +} + +## OUTPUT ## +# save OPNMF solutions to outputs # +print(paste0("Save OPNMF W H matrix here: ", out_dir)) +saveRDS(W_H, file = paste(out_dir, + paste(input_name, "rank", args[2], "W_H.rds", sep="_"), + sep = "/")) +print("DONE!") \ No newline at end of file diff --git a/03_OPNMF/README.md b/03_OPNMF/README.md new file mode 100644 index 0000000..b85faa2 --- /dev/null +++ b/03_OPNMF/README.md @@ -0,0 +1,7 @@ +# Orthogonal Projective Non-Negative Matrix Factorization (OPNMF) + +The scripts should be run in sequential order to establish OPNMF parcellations for chimpanzees and humans, conduct ARI (adjusted rand index) comparison, bootstrapping, and finally create the granularity selection plot (Fig. 2A in article). Additionally ```parcel_ari.R``` can be used to create the parcel-wise ARI brain map by projecting the highest cross-species ARI per parcel onto the human MNI template space. + +OPNMF is conducted using this repo (https://github.com/kaurao/opnmfR) which needs to be installed using ```remotes::install_github("kaurao/opnmfR")```. For bootstrapping (```run_OPMNF_boot.sh```) the ```opnmfR_boot.R``` function is used. + +OPNMF solutions for the chimpanzee (n189) and IXI (n480) with granularities of 2 - 40 can be downlaoded [here](https://zenodo.org/record/7116203#.YzLvCfexWV4). Additionally the output of all 100 bootstraps (without the parcellations) is also provided. diff --git a/03_OPNMF/opnmfR_boot.R b/03_OPNMF/opnmfR_boot.R new file mode 100755 index 0000000..002c9e8 --- /dev/null +++ b/03_OPNMF/opnmfR_boot.R @@ -0,0 +1,113 @@ + +opnmfR_ranksel_perm_boot <- function(X, rs, W0=NULL, use.rcpp=TRUE, nperm=1, boot=TRUE, nboot=1, + plots=TRUE, fact=TRUE, seed=NA, rtrue=NA, ...) { + stopifnot(ncol(X)>=max(rs)) + start_time <- Sys.time() + + # original data + cat("original data... ") + nn <- list() + mse <- list() + if(is.na(seed)) seed <- sample(1:10^6, 1) + if(boot) { + mse <- vector(mode = "list", length = length(rs)) + + for (b in 1:nboot) { + set.seed(seed+b) + nn[[b]] <- list() + Xboot <- X[ ,sample(ncol(X), replace = TRUE)] + for (r in 1:length(rs)) { + if(use.rcpp) { + + nn[[b]]$boot[[r]] <- opnmfR::opnmfRcpp(Xboot, rs[r], W0=W0, ...) + } else { + + nn[[b]]$boot[[r]] <- opnmfR::opnmfR(Xboot, rs[r], W0=W0, ...) + } + + cat("orig", "rank:", r, "Bootstrap", b, "iter:", nn[[b]]$boot[[r]]$iter, + "time:", nn[[b]]$boot[[r]]$time, "diffW:", nn[[b]]$boot[[r]]$diffW, "\n") + #mse[[r]] <- list() + #mse[[r]]$orig <- rep(NA,length(nboot)) + mse[[r]]$orig[b] <- opnmfR::opnmfR_mse(Xboot, nn[[b]]$boot[[r]]$W, nn[[b]]$boot[[r]]$H) + } + } + cat("done\n") + + } else { + + for(r in 1:length(rs)) { + if(use.rcpp) { + nn[[r]] <- opnmfR::opnmfRcpp(X, rs[r], W0=W0, ...) + } else { + nn[[r]] <- opnmfR::opnmfR(X, rs[r], W0=W0, ...) + } + + cat("orig", "rank:", r, "iter:", nn[[r]]$iter, "time:", nn[[r]]$time, + "diffW:", nn[[r]]$diffW, "\n") + + mse[[r]] <- list() + mse[[r]]$orig <- opnmfR::opnmfR_mse(X, nn[[r]]$W, nn[[r]]$H) + } + + } + cat("done\n") + + # permuted data + cat("permuted data... ") + for(p in 1:nperm) { + set.seed(seed+p) + Xperm <- apply(X,2,sample) # permute the rows in each column + for(r in 1:length(rs)) { + if(use.rcpp) { + nnp <- opnmfR::opnmfRcpp(Xperm, rs[r], W0=W0, ...) + } else { + nnp <- opnmfR::opnmfR(Xperm, rs[r], W0=W0, ...) + } + + cat("perm", p, "rank:", r, "iter:", nnp$iter, "time:", nnp$time,"diffW:", nnp$diffW, "\n") + mse[[r]]$perm <- c(mse[[r]]$perm, opnmfR_mse(Xperm, nnp$W, nnp$H)) + } + } + cat("done\n") + + names(mse) <- rs + names(nn) <- rs + if(boot) { + mseorig <- sapply(mse, function(xx) mean(xx$orig)) + } else { + mseorig <- sapply(mse, function(xx) xx$orig) + } + + mseperm <- sapply(mse, function(xx) mean(xx$perm)) + mseorig <- mseorig / max(mseorig) + mseperm <- mseperm / max(mseperm) + + sel <- which(diff(mseorig) > diff(mseperm)) + selr <- rs[sel] + + if(plots) { + maxi <- max(c(mseorig, mseperm)) + mini <- min(c(mseorig, mseperm)) + + plot(NA, xlim=c(min(rs),max(rs)), ylim=c(mini,maxi), xlab="Rank", ylab="MSE (scaled)") + points(rs, mseorig, type='b', pch=16) + points(rs, mseperm, type='b', pch=17, lty=2) + points(selr, mseorig[sel], cex=2, pch=1, lwd=2, col="red") + if(!is.na(rtrue)) abline(v=rtrue, lty=2, col="gray") + legend("bottomleft", legend = c("Orig.","Perm."), pch=16:17) + title(main="Permutation", sub=paste("Selected rank = ", min(selr))) + } + + if(fact) { + fact <- nn + } else { + fact <- nn[sel] + } + + end_time <- Sys.time() + tot_time <- end_time - start_time + + return(list(mse=mse, selected=selr, factorization=fact, time=tot_time, seed=seed)) + +} \ No newline at end of file diff --git a/03_OPNMF/parcel_ari.R b/03_OPNMF/parcel_ari.R new file mode 100644 index 0000000..b9f80cb --- /dev/null +++ b/03_OPNMF/parcel_ari.R @@ -0,0 +1,110 @@ +#### Single parcel ARI comparison between chimp and human 17-factor solutions #### +## creates Fig. 2B volume nifti ## +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +if (!require("pacman")) install.packages("pacman") +pacman::p_load(oro.nifti, fs, neurobase, aricode, reshape2, ggplot2, dplyr) +# Helper script with useful functions # +source(file = paste(path_wd(), "code", "OPNMF_util.R", sep = "/")) + +# path to human opnmf solutions # +human_path <- paste(path_wd(), "data", "IXI", "opnmf_parc", sep = "/") +# path to chimpanzee opnmf solutions # +chimp_path <- paste(path_wd(), "data", "chimp", "opnmf_parc", sep = "/") + +# Read in human and chimp opnmf solutions for single ARI evaluation # +## INPUT 1: Human OPNMF 17-factor solution ## +# Parcellation image median filtered to minimize slight morphology differences # +human_img <- readnii(paste(human_path, "IXI_cort_n480_rank_17.nii.gz", + sep = "/")) +img_A <- human_img + +# INPUT 2: Chimp parcellation in MNI space # +chimp_img <- readnii(paste(chimp_path, + "whimp_cortex_n189_TPM03_rank_17_num_match.nii.gz", + sep = "/")) +img_B <- chimp_img + +# empty list to be filled with single component niftis from both images # +A_imgs <- vector(mode = "list", length = length(unique(c(img_data(img_A))))-1) +B_imgs <- vector(mode = "list", length = length(unique(c(img_data(img_B))))-1) +# IF I want to make this more useable can add an if statement to see if the two +# images have the same amount of components and then do different loops for each +# option # +for (i in 1:length(A_imgs)) { + + A_imgs[[i]] <- mask_img(img_A, img_A == i) + B_imgs[[i]] <- mask_img(img_B, img_B == i) + +} + +# empty matrix to be filled with ari values # +ari_mat <- matrix(NA, nrow = length(A_imgs), ncol = length(B_imgs)) +for (k in 1:nrow(ari_mat)) { + for (j in 1:ncol(ari_mat)) { + # calculate ari for each parcel compared to all parcels of other species # + # use human solution as mask b/c it is slightly smaller # + ari_mat[k,j] <- parc_sim_ARI(A_imgs[[k]], B_imgs[[j]], human_img) + } +} + +comps <- 1:(length(unique(c(img_data(img_A))))-1) +ari_df <- data.frame(comps = as.factor(comps),rbind(ari_mat)) +#colnames(ari_df) <- c("comps",paste("comp", 1:ncol(ari_mat), sep = "_")) +colnames(ari_df) <- c("comps", 1:17) +ari_dat <- melt(ari_df, id.vars = 1) +# create a thresholded df for plot labels +ari_thr <- ari_dat %>% + filter(., value > 0.15) %>% + rename(., value_thr = value) +# join data frame where less the thr vlaues are now NA's +ari_dat_1 <- full_join(ari_dat, ari_thr) + +# create heat map of ARI parcel values # +ggplot(ari_dat_1, aes(x=comps, y=variable)) + + geom_tile(aes(fill = value)) + + geom_text(aes(label = round(value_thr, 2))) + + scale_fill_viridis_b(option = "plasma") + # viridis or magma + labs(x = "Chimpanzee Parcel Number", + y = "IXI Parcel Number", + fill = "ARI") + + theme_classic(base_size = 20) + + theme(line = element_blank()) +## OUTPUT 1: Heatmap matrix of parcel-wise ARI similarity ### +ggsave(paste(path_wd(), "data","IXI_n480_chimp_n189_TPM03_sing_rank17_ari.png", + sep = "/")) + +#### Create ARI brain plot by using max ARI for each parcel & plot of IXI 17 #### + +# row-wise max values represent the max ari for each IXI 17-factor parcel +human_max <- apply(ari_mat, 1, max) + +# create human img vector volume # +human_vol <- c(img_data(human_img)) +# create volume index # +human_ind <- human_vol > 0 +# create GM mask human image # +human_GM <- human_vol[human_ind] +# create output human volume for ARI image # +human_out_GM <- human_GM + +# loop over parcel numbers and change to matched numbers with IXI solution # +for (i in 1:length(human_max)) { + + # index for chimp in mni space # + comp_ind <- human_GM == i + + # input ARI values for each parcel # + human_out_GM[comp_ind] <- human_max[i] + +} + +# Output 2: Parcel-wise ARI plotted on IXI 17-factor solution # +# name of image # +out_name_ari <- paste("IXI_chimp_17C_parcel_ari_MNI", sep = "") + +#out_vol_mni <- chimp_vol #chimp_vol MNI space +human_vol[human_ind] <- human_out_GM +vol_2_img(vol_vec = human_vol, img_info = human_img, location = human_path, + img_name = out_name_ari) diff --git a/04_Age_Expansion_Analysis/01_run_age_def_analyses.sh b/04_Age_Expansion_Analysis/01_run_age_def_analyses.sh new file mode 100644 index 0000000..ff421db --- /dev/null +++ b/04_Age_Expansion_Analysis/01_run_age_def_analyses.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# +# Run script to conduct age - expansion analyses # +# using parcellation (OPNMF or Davi130) # + +# The R script requires 5 input arguments # +### 1 - Working directory (wd) where relative paths can be used to find files +### 2 - Cohort investigated +### 3 - Parcellation Image used for analyses +### 4 - Deformation field (Inter-species) Nifti relaltive path from wd +### 5 - name of phenotype meta data with subj names, Age, Sex, Scanner, TIV + +# 7 outputs are created and saved in wd/outputs/ # +### OUTPUT 1 - Meta data of sample with comp mean grey matter vol (csv) +### OUTPUT 2 - output statistics from component wise age regression (csv) +### OUTPUT 3 - T-statistic from age reg projected onto brain (nifti) +### OUTPUT 4 - ONLY significant FWE T-statistic from age reg projected onto brain (nifti) +### OUTPUT 5 - Average deformation per parcellation component image (nifti) +### OUTPUT 6 - deformation & T-statistic data fro post-hoc comparison (csv) +### OUTPUT 7 - Z scored parcellation expanion map (nifti) + +# Name of R script # +r_script=age_def_analyses.R + +# wd # +working_dir="~/projects/chimp_human_opnmf/" + +# cohort investigated - "chimp", "IXI", or "eNKI" +cohort="chimp" + +# path to GM mask # +parc="chimp_cortex_n189_TPM03_rank_17_num_match.nii.gz" + +# Cross-species expansion mapp +exp_map="Haiko2Juna_expansion_0.08_0.8_2mm.nii.gz" + +# Name of meta data csv file to gather sample for analyses & age reg model # +meta_sample="chimp_meta_QC_n189.csv" + +# run R script +Rscript $r_script $working_dir $cohort $parc $exp_map $meta_sample +echo Conduction age & expansion analyses using $parc parcellation and $exp_map expasnion map on $cohort sample + + diff --git a/04_Age_Expansion_Analysis/02a_age_def_compare.R b/04_Age_Expansion_Analysis/02a_age_def_compare.R new file mode 100644 index 0000000..74a1df0 --- /dev/null +++ b/04_Age_Expansion_Analysis/02a_age_def_compare.R @@ -0,0 +1,207 @@ +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, ggplot2, dplyr, tidyr, reshape2, readr, + broom, tibble, MASS, sfsmisc, cocor, oro.nifti, neurobase, ggrepel) + +#### INPUTS #### +# path to data - which is the outputs folder from opnmf_age_def.R script # +dat_path <- paste(path_wd(), "outputs", sep = "/") + +## INPUT 1: chimp - baboon (haiko) age def OPNMF csv ## +chimp_B_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_17Chimp_cort_n189_TPM03_rank_17_num_match_Haiko2Juna_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Chimpanzee(Baboon)")) %>% + rename(factor = variable) + +## INPUT 2: chimp - Macaque (MeanMacaque) age def OPNMF csv ## +chimp_M_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_17Chimp_cort_n189_TPM03_rank_17_num_match_MeanMacaque2Juna_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Chimpanzee(Macaque)")) %>% + rename(factor = variable) + + +## INPUT 3: Human (IXI) - Chimp age def OPNMF csv ## +human_dat <- read_csv(file = paste(dat_path, + "IXI_n304_U58_meta_Rank_17IXI_cort_n480_rank_17_Juna2MNI_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Human")) %>% + rename(factor = variable) + +# df with both chimp data +chimp_MB_dat <- dplyr::full_join(chimp_B_dat, chimp_M_dat) + +#### Compare aging and expansion using permutation testing ##### +nperm <- 10e4 +### Should turn perm test into a simple function ### + +## Species spereate ## +## HUMAN ## + +# empty list for human perm correlation results # +perm_res_H <- vector("list", nperm) +set.seed(47) +for (perm_H in 1:nperm) { + + perm_res_H[[perm_H]] <- cor(x=human_dat$statistic, y=sample(human_dat$zscore_def), + method = "pearson") +} + +H_cor <- cor(human_dat$statistic, human_dat$zscore_def) + +perm_res_df_H <- data.frame(perm_R = unlist(perm_res_H)) + +h_p_val <- sum(unlist(perm_res_H) >= H_cor ) / nperm + +ggplot(perm_res_df_H, aes(perm_R)) + + geom_density(color="#CC79A7",fill="#CC79A7") + + geom_vline(xintercept = H_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=-0.75, y=c(1.3, 1.2), color = "#CC79A7", + label = c(paste("italic(r) == ", round(H_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(h_p_val,3), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "IXI_N480_J2M_Rank17_R_perm_10e4_IXI304.png", + sep = "/")) + +## CHIMPNAZEE ## + +# Baboon(Haiko) template -> Juna expansion # + +perm_res_C_B <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_B[[perm_C]] <- cor(x=chimp_B_dat$statistic, y=sample(chimp_B_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_B_cor <- cor(chimp_B_dat$statistic, chimp_B_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_B <- data.frame(perm_R = unlist(perm_res_C_B)) +# gather p-value +C_B_pval <- sum(unlist(perm_res_C_B) <= C_B_cor ) / nperm + +ggplot(perm_res_df_C_B, aes(perm_R)) + + geom_density(color= "#E1BE6A", fill= "#E1BE6A") + + geom_vline(xintercept = C_B_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(1.3, 1.2), color = "#E1BE6A", + label = c(paste("italic(r) == ", round(C_B_cor, 3), sep=""), + paste("italic(p) == ", sprintf(round(C_B_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_H2J_Rank17_R_perm_10e4.png", + sep = "/")) + +# Macaque(MeanMacaque) template -> Juna expansion # + +perm_res_C_M <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_M[[perm_C]] <- cor(x=chimp_M_dat$statistic, y=sample(chimp_M_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_M_cor <- cor(chimp_M_dat$statistic, chimp_M_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_M <- data.frame(perm_R = unlist(perm_res_C_M)) +# gather p-value +C_M_pval <- sum(unlist(perm_res_C_M) <= C_M_cor ) / nperm + +ggplot(perm_res_df_C_M, aes(perm_R)) + + geom_density(color= "#56B4E9", fill= "#56B4E9") + + geom_vline(xintercept = C_M_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(1.3, 1.2), color = "#56B4E9", + label = c(paste("italic(r) == ", round(C_M_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(C_M_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_MM2J_Rank17_R_perm_10e4.png", + sep = "/")) + +#### Plot the comparison of aging and expansion #### + +# HUMAN PLOT # +labels_select <- c(5, 5+17, 3, 3+17, 4, 4+17, 16, 16+17, 15, 15+17, + 8, 8+17, 13, 13+17, 17, 17+17) +# create a col for labels in plot # +human_dat$labels_sel <- as.character(human_dat$factor) + +human_dat$labels_sel[-labels_select] <- "" + + +ggplot(human_dat, aes(x = statistic, y = zscore_def, color=Species, + label = labels_sel)) + + geom_point(size = 4.5) + + scale_x_continuous(limits = c(2, 16), breaks = seq(2, 16, 2)) + + scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + geom_text_repel(size = 10, box.padding = 3, nudge_x = 0.5) + + theme_bw() + + scale_fill_manual(values = "#CC79A7") + + scale_color_manual(values = "#CC79A7") + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + + +ggsave(filename = paste(dat_path, + "IXI_n480_J2M_rank17_exp_age_IXI304.png", + sep = "/")) + +# Chimpanzee aging compared to both macaque and baboon expansion plot # + +# create a col for labels in plot # +chimp_MB_dat$labels_sel <- as.character(chimp_MB_dat$factor) + +chimp_MB_dat$labels_sel[-labels_select] <- "" + +ggplot(chimp_MB_dat, aes(x = statistic, y = zscore_def, + color=Species, label = labels_sel)) + + geom_point(size = 4.5) + + scale_x_continuous(limits = c(1, 8), breaks = seq(1, 8, 1)) + + scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + geom_text_repel(size = 10, box.padding = 1) + + theme_bw() + + scale_fill_manual(values = c("#E1BE6A", "#56B4E9")) + + scale_color_manual(values = c("#E1BE6A", "#56B4E9")) + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + +# save plot # +ggsave(filename = paste(dat_path, + "Chimp_n189_H2J_M2J_rank17_exp_age.png", + sep = "/")) diff --git a/04_Age_Expansion_Analysis/02b_age_def_compare_eNKI.R b/04_Age_Expansion_Analysis/02b_age_def_compare_eNKI.R new file mode 100644 index 0000000..50c5909 --- /dev/null +++ b/04_Age_Expansion_Analysis/02b_age_def_compare_eNKI.R @@ -0,0 +1,207 @@ +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, ggplot2, dplyr, tidyr, reshape2, readr, + broom, tibble, MASS, sfsmisc, cocor, oro.nifti, neurobase, ggrepel) + +#### INPUTS #### +# path to data - which is the outputs folder from opnmf_age_def.R script # +dat_path <- paste(path_wd(), "outputs", sep = "/") + +## INPUT 1: chimp - baboon (haiko) age def OPNMF csv ## +chimp_B_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_17Chimp_cort_n189_TPM03_rank_17_num_match_Haiko2Juna_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Chimpanzee(Baboon)")) %>% + rename(factor = variable) + +## INPUT 2: chimp - Macaque (MeanMacaque) age def OPNMF csv ## +chimp_M_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_17Chimp_cort_n189_TPM03_rank_17_num_match_MeanMacaque2Juna_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Chimpanzee(Macaque)")) %>% + rename(factor = variable) + + +## INPUT 3: Human (IXI) - Chimp age def OPNMF csv ## +human_dat <- read_csv(file = paste(dat_path, + "eNKI_meta_n474_Rank_17IXI_cort_n480_rank_17_Juna2MNI_expansion.csv", + sep = "/")) %>% + mutate(statistic = abs(statistic), + variable = as.factor(1:17)) %>% # make T-stat positive for easier plotting and explaining + add_column(Species = as.factor("Human")) %>% + rename(factor = variable) + +# df with both chimp data +chimp_MB_dat <- dplyr::full_join(chimp_B_dat, chimp_M_dat) + +#### Compare aging and expansion using permutation testing ##### +nperm <- 10e4 +### Should turn perm test into a simple function ### + +## Species spereate ## +## HUMAN ## + +# empty list for human perm correlation results # +perm_res_H <- vector("list", nperm) +set.seed(47) +for (perm_H in 1:nperm) { + + perm_res_H[[perm_H]] <- cor(x=human_dat$statistic, y=sample(human_dat$zscore_def), + method = "pearson") +} + +H_cor <- cor(human_dat$statistic, human_dat$zscore_def) + +perm_res_df_H <- data.frame(perm_R = unlist(perm_res_H)) + +h_p_val <- sum(unlist(perm_res_H) >= H_cor ) / nperm + +ggplot(perm_res_df_H, aes(perm_R)) + + geom_density(color="#CC79A7",fill="#CC79A7") + + geom_vline(xintercept = H_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=-0.75, y=c(1.3, 1.2), color = "#CC79A7", + label = c(paste("italic(r) == ", round(H_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(h_p_val,3), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "IXI_N480_J2M_Rank17_R_perm_10e4_CamCAN651.png", + sep = "/")) + +## CHIMPNAZEE ## + +# Baboon(Haiko) template -> Juna expansion # + +perm_res_C_B <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_B[[perm_C]] <- cor(x=chimp_B_dat$statistic, y=sample(chimp_B_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_B_cor <- cor(chimp_B_dat$statistic, chimp_B_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_B <- data.frame(perm_R = unlist(perm_res_C_B)) +# gather p-value +C_B_pval <- sum(unlist(perm_res_C_B) <= C_B_cor ) / nperm + +ggplot(perm_res_df_C_B, aes(perm_R)) + + geom_density(color= "#E1BE6A", fill= "#E1BE6A") + + geom_vline(xintercept = C_B_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(1.3, 1.2), color = "#E1BE6A", + label = c(paste("italic(r) == ", round(C_B_cor, 3), sep=""), + paste("italic(p) == ", sprintf(round(C_B_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_H2J_Rank17_R_perm_10e4.png", + sep = "/")) + +# Macaque(MeanMacaque) template -> Juna expansion # + +perm_res_C_M <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_M[[perm_C]] <- cor(x=chimp_M_dat$statistic, y=sample(chimp_M_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_M_cor <- cor(chimp_M_dat$statistic, chimp_M_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_M <- data.frame(perm_R = unlist(perm_res_C_M)) +# gather p-value +C_M_pval <- sum(unlist(perm_res_C_M) <= C_M_cor ) / nperm + +ggplot(perm_res_df_C_M, aes(perm_R)) + + geom_density(color= "#56B4E9", fill= "#56B4E9") + + geom_vline(xintercept = C_M_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(1.3, 1.2), color = "#56B4E9", + label = c(paste("italic(r) == ", round(C_M_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(C_M_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_MM2J_Rank17_R_perm_10e4.png", + sep = "/")) + +#### Plot the comparison of aging and expansion #### + +# HUMAN PLOT # +labels_select <- c(5, 5+17, 3, 3+17, 4, 4+17, 16, 16+17, 15, 15+17, + 8, 8+17, 13, 13+17, 17, 17+17) +# create a col for labels in plot # +human_dat$labels_sel <- as.character(human_dat$factor) + +human_dat$labels_sel[-labels_select] <- "" + + +ggplot(human_dat, aes(x = statistic, y = zscore_def, color=Species, + label = labels_sel)) + + geom_point(size = 4.5) + + #scale_x_continuous(limits = c(2, 16), breaks = seq(2, 16, 2)) + + scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + geom_text_repel(size = 10, box.padding = 3, nudge_x = 0.5) + + theme_bw() + + scale_fill_manual(values = "#CC79A7") + + scale_color_manual(values = "#CC79A7") + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + + +ggsave(filename = paste(dat_path, + "IXI_n480_J2M_rank17_exp_age_eNKI474.png", + sep = "/")) + +# Chimpanzee aging compared to both macaque and baboon expansion plot # + +# create a col for labels in plot # +chimp_MB_dat$labels_sel <- as.character(chimp_MB_dat$factor) + +chimp_MB_dat$labels_sel[-labels_select] <- "" + +ggplot(chimp_MB_dat, aes(x = statistic, y = zscore_def, + color=Species, label = labels_sel)) + + geom_point(size = 4.5) + + scale_x_continuous(limits = c(1, 8), breaks = seq(1, 8, 1)) + + scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + geom_text_repel(size = 10, box.padding = 1) + + theme_bw() + + scale_fill_manual(values = c("#E1BE6A", "#56B4E9")) + + scale_color_manual(values = c("#E1BE6A", "#56B4E9")) + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + +# save plot # +ggsave(filename = paste(dat_path, + "Chimp_n189_H2J_M2J_rank17_exp_age.png", + sep = "/")) diff --git a/04_Age_Expansion_Analysis/02c_age_def_compare_davi.R b/04_Age_Expansion_Analysis/02c_age_def_compare_davi.R new file mode 100644 index 0000000..c60ca25 --- /dev/null +++ b/04_Age_Expansion_Analysis/02c_age_def_compare_davi.R @@ -0,0 +1,203 @@ +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, ggplot2, dplyr, tidyr, reshape2, readr, + broom, tibble, MASS, sfsmisc, cocor, oro.nifti, neurobase, ggrepel) + +#### INPUTS #### +# path to data - which is the outputs folder from opnmf_age_def.R script # +dat_path <- paste(path_wd(), "outputs", sep = "/") + +## INPUT 1: chimp - baboon (haiko) age def OPNMF csv ## +chimp_B_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_110Davi130_2mm_cortex.nii_Haiko2Juna_expansion.csv", + sep = "/")) %>% + mutate(variable = as.factor(1:nrow(.))) %>% # + add_column(Species = as.factor("Chimpanzee(Baboon)")) %>% + rename(factor = variable) + +## INPUT 2: chimp - Macaque (MeanMacaque) age def OPNMF csv ## +chimp_M_dat <- read_csv(file = paste(dat_path, + "Chimp_meta_QC_u50_n189_Rank_110Davi130_2mm_cortex.nii_MeanMacaque2Juna_expansion.csv", + sep = "/")) %>% + mutate(variable = as.factor(1:nrow(.))) %>% # + add_column(Species = as.factor("Chimpanzee(Macaque)")) %>% + rename(factor = variable) + + +## INPUT 3: Human (IXI) - Chimp age def OPNMF csv ## +human_dat <- read_csv(file = paste(dat_path, + "IXI_n304_U58_meta_Rank_110Davi130_MNI_3mm_cortex_Juna2MNI_expansion.csv", + sep = "/")) %>% + mutate(variable = as.factor(1:nrow(.))) %>% + add_column(Species = as.factor("Human")) %>% + rename(factor = variable) + +# df with both chimp data +chimp_MB_dat <- dplyr::full_join(chimp_B_dat, chimp_M_dat) + +#### Compare aging and expansion using permutation testing ##### +nperm <- 10e4 +### Should turn perm test into a simple function ### + +## Species Independently ## +## HUMAN ## + +# empty list for human perm correlation results # +perm_res_H <- vector("list", nperm) +set.seed(47) +for (perm_H in 1:nperm) { + + perm_res_H[[perm_H]] <- cor(x=sample(human_dat$statistic), y=human_dat$zscore_def, + method = "pearson") +} + +H_cor <- cor(human_dat$statistic, human_dat$zscore_def) + +perm_res_df_H <- data.frame(perm_R = unlist(perm_res_H)) + +h_p_val <- sum(unlist(perm_res_H) <= H_cor ) / nperm + +ggplot(perm_res_df_H, aes(perm_R)) + + geom_density(color="#CC79A7",fill="#CC79A7") + + geom_vline(xintercept = H_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=-0.75, y=c(3, 2.5), color = "#CC79A7", + label = c(paste("italic(r) == ", round(H_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(h_p_val,7), fmt = "%#.7f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "IXI_N480_J2M_Davi130_Cortex_R_perm_10e4_IXI304.png", + sep = "/")) + +## CHIMPNAZEE ## + +# Baboon(Haiko) template -> Juna expansion # + +perm_res_C_B <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_B[[perm_C]] <- cor(x=chimp_B_dat$statistic, y=sample(chimp_B_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_B_cor <- cor(chimp_B_dat$statistic, chimp_B_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_B <- data.frame(perm_R = unlist(perm_res_C_B)) +# gather p-value +C_B_pval <- sum(unlist(perm_res_C_B) >= C_B_cor ) / nperm + +ggplot(perm_res_df_C_B, aes(perm_R)) + + geom_density(color= "#E1BE6A", fill= "#E1BE6A") + + geom_vline(xintercept = C_B_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(3, 2.5), color = "#E1BE6A", + label = c(paste("italic(r) == ", round(C_B_cor, 3), sep=""), + paste("italic(p) == ", sprintf(round(C_B_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_H2J_davi130_Cortex_R_perm_10e4.png", + sep = "/")) + +# Macaque(MeanMacaque) template -> Juna expansion # + +perm_res_C_M <- vector("list", nperm) +set.seed(47) +for (perm_C in 1:nperm) { + + perm_res_C_M[[perm_C]] <- cor(x=chimp_M_dat$statistic, y=sample(chimp_M_dat$zscore_def), + method = "pearson") +} +# correlation between aging and baboon - chimp expansion of across OPNMF factors # +C_M_cor <- cor(chimp_M_dat$statistic, chimp_M_dat$zscore_def) +# mnake a df fr easier ploting # +perm_res_df_C_M <- data.frame(perm_R = unlist(perm_res_C_M)) +# gather p-value +C_M_pval <- sum(unlist(perm_res_C_M) >= C_M_cor ) / nperm + +ggplot(perm_res_df_C_M, aes(perm_R)) + + geom_density(color= "#56B4E9", fill= "#56B4E9") + + geom_vline(xintercept = C_M_cor, lwd=2, lty=2, color = "grey") + + scale_x_continuous(limits = c(-1,1), breaks = seq(-1,1,0.5)) + + labs(x = "Pearson's R", + y = "Count") + + theme_classic(base_size = 20) + + annotate("text", x=0.75, y=c(3, 2.5), color = "#56B4E9", + label = c(paste("italic(r) == ", round(C_M_cor, 2), sep=""), + paste("italic(p) == ", sprintf(round(C_M_pval,2), fmt = "%#.4f"), + sep = "")), + size=8, parse=TRUE) + +ggsave(filename = paste(dat_path, + "Chimp_N189_MM2J_davi130_Cortex_R_perm_10e4.png", + sep = "/")) + +#### Plot the comparison of aging and expansion #### + +# HUMAN PLOT # +#labels_select <- c(5, 5+17, 3, 3+17, 4, 4+17, 16, 16+17, 15, 15+17, +# 8, 8+17, 13, 13+17, 17, 17+17) +# create a col for labels in plot # +#human_dat$labels_sel <- as.character(human_dat$factor) + +#human_dat$labels_sel[-labels_select] <- "" + + +ggplot(human_dat, aes(x = statistic, y = zscore_def, color=Species)) + + geom_point(size = 4.5) + + scale_x_continuous(limits = c(-16, 3), breaks = seq(-16, 2, 2)) + + scale_y_continuous(limits = c(-4, 4), breaks = seq(-4, 4, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + #geom_text_repel(size = 10, box.padding = 3, nudge_x = 0.5) + + theme_bw() + + scale_fill_manual(values = "#CC79A7") + + scale_color_manual(values = "#CC79A7") + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + + +ggsave(filename = paste(dat_path, + "IXI_n480_J2M_davi130_Cortex_exp_age_IXI304.png", + sep = "/")) + +# Chimpanzee aging compared to both macaque and baboon expansion plot # + +# create a col for labels in plot # +#chimp_MB_dat$labels_sel <- as.character(chimp_MB_dat$factor) + +#chimp_MB_dat$labels_sel[-labels_select] <- "" + +ggplot(chimp_MB_dat, aes(x = statistic, y = zscore_def, + color=Species)) + + geom_point(size = 4.5) + + scale_x_continuous(limits = c(-8, 2), breaks = seq(-8, 2, 1)) + + scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) + + geom_smooth(aes(fill = Species),formula = y ~ x, method = "rlm", size = 1.5, alpha = 0.3) + + #geom_text_repel(size = 10, box.padding = 1) + + theme_bw() + + scale_fill_manual(values = c("#E1BE6A", "#56B4E9")) + + scale_color_manual(values = c("#E1BE6A", "#56B4E9")) + + labs(x = "Age - Gray Matter model (t-statistic)", + y = "Cross-species Expansion (Z-scale)") + + theme_classic(base_size = 25) + + theme(legend.position = "none") + +# save plot # +ggsave(filename = paste(dat_path, + "Chimp_n189_H2J_M2J_davi130_Cortex_exp_age.png", + sep = "/")) diff --git a/04_Age_Expansion_Analysis/README.md b/04_Age_Expansion_Analysis/README.md new file mode 100644 index 0000000..8f8f9ea --- /dev/null +++ b/04_Age_Expansion_Analysis/README.md @@ -0,0 +1,6 @@ +# Aging and Expansion Analyses + +These scripts are separated into 2 parts: first creating the aging and expansion data and then secondly running the various comparisons presented in the manuscript. + +All the results from these analyses for the manuscript are availble to download [here](https://zenodo.org/record/7116203#.YzLvCfexWV4). + diff --git a/04_Age_Expansion_Analysis/age_def_analyses.R b/04_Age_Expansion_Analysis/age_def_analyses.R new file mode 100644 index 0000000..9260173 --- /dev/null +++ b/04_Age_Expansion_Analysis/age_def_analyses.R @@ -0,0 +1,256 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) +### Analysis of interspecies deformation and aging utilising OPNMF structural +### covariance regions +### 5 INPUTS: +### ARG_1 - Working directory (wd) where relative paths can be used to find files +### ARG_2 - Cohort investigated +### ARG_3 - Parcellation Image used for analyses +### ARG_4 - Deformation field (Inter-species) Nifti relaltive path from wd +### ARG_5 - name of phenotype meta data with subj names, Age, Sex, Scanner, TIV + +## arg1 working dir ## +wd <- args[1] # e.g. "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(oro.nifti, neurobase, fs, ggplot2, dplyr, tidyr, reshape2, readr, + broom, tibble, MASS, sfsmisc) +### A utility script needs to be in wd/code which contains helper functions +# source the utility script for helper functions # +source(file = paste(path_wd(), "code", "OPNMF_util.R", sep = "/")) + +### 6 OUTPUTS: +### all output files from script will be written in a created wd/outputs dir +output_path <- paste(path_wd(), "outputs", sep = "/") # output dir +# from fs creates dir but ignores if already exists # +dir_create(path = output_path) +### OUTPUT 1 - Meta data of sample with comp mean grey matter vol (csv) +### OUTPUT 2 - output statistics from component wise age regression (csv) +### OUTPUT 3 - T-statistic from age reg projected onto brain (nifti) +### OUTPUT 4 - ONLY significant FWE T-statistic from age reg projected onto brain (nifti) +### OUTPUT 5 - Average deformation per parcellation component image (nifti) +### OUTPUT 6 - deformation & T-statistic data fro post-hoc comparison (csv) +### OUTPUT 7 - Z scored parcellation expanion map (nifti) + + +#### Part 1: Read in input files for script #### + +## Cohort Name ## +cohort <- args[2] + +# parcellation for masking and analysis # +## arg2 input parcellation ## +parc_fil <- args[3] + +parc_path <- paste(path_wd(), "data", cohort, "opnmf_parc", + parc_fil, sep = "/") + +parc_name <- paste(sub(pattern = ".nii.gz", replacement = "\\1", + basename(parc_path)), sep = "") + +print(paste0("Read in parcellation image ", parc_path)) +opnmf_parc <- readnii(parc_fil) +print("DONE!") + +# deformation image # +## arg 3 input deformation field ## +def_name <- args[4] # Name of cross-species expansion map +# e.g. JunaChimp_expansionMNI_3mm.nii.gz# +def_path <- paste(path_wd(), "data", "expansion_maps", + def_name, sep = "/") +print(paste0("Read in Deformation image ", def_name)) +def_img <- readnii(def_path) +print("DONE!") + +# dir containing the sample for analysis - IXI, chimp, or eNKI # +## preprocessed images need to be put int /wd/data/$cohort/mwp1 +GM_img_dir <- paste(path_wd(), "data", cohort, "mwp1", sep = "/") + +# read in meta data from sample e.g. age, sex, subj names # +## arg 5 .csv file containing meta data ## +meta_dat_name <- args[5] # e.g. IXI_n304_U58_meta.csv +meta_dat_path <- paste(path_wd(), "data", cohort, + meta_dat_name, sep = "/") + +print(paste0("Read in Meta Data csv ", meta_dat_name)) +GM_img_meta <- read_csv(file = meta_dat_path) +print("DONE!") + +#### Part 2: Parcellation AVG GMV of sample #### +# Number of components or parcels in parcellation # +num_comps <- length(unique(c(img_data(opnmf_parc))))-1 +# convert characters to factros for easier handling # +GM_img_meta <- mutate(GM_img_meta, across(where(is.character),as.factor)) + +comp_GM_mat <- matrix(data = NA, nrow = nrow(GM_img_meta), # No. Subjects + ncol = num_comps) # No. Comps +print("starting GM parcel-wise extraction") + +for (subj in 1:nrow(comp_GM_mat)) { + # mwp1 nifti # + mwp1_img <- readnii(paste(GM_img_dir, + paste("mwp1", + c(as.character(GM_img_meta$Subject[subj])), sep = ""), + sep = "/")) + comp_GM_mat[subj,] <- parcel_avg(parc=opnmf_parc, img=mwp1_img, img_vol=FALSE) +} +print("Extraction finished!") +# create a data frame to write out as csv file # +comp_df <- data.frame(GM_img_meta, Comp = comp_GM_mat) +# name of output csv file # +out_csv_name <- paste(sub(pattern = ".csv", replacement = "\\1", + basename(meta_dat_name)), "_Rank_", as.character(num_comps), + ".csv", sep = "") + +## OUTPUT 1: meta data and mean GMV for each component +print(paste0("Write parcel-wise GM data csv ", out_csv_name, + " here: ", output_path)) +write.csv(comp_df, row.names = FALSE , + file = paste(output_path, out_csv_name, sep = "/")) +print("DONE!") + +#### Part 3: parcellation component GMV age regression #### + +# select variables of interest and conduct a linear model for each comp # +print("Conduct aging regression model") +lm_Comp_age <- dplyr::select(comp_df, Age, Sex, Scanner, TIV, contains("Comp.")) %>% + melt(., id.vars = c("Age", "Sex", "Scanner", "TIV"), na.rm = TRUE) %>% + group_by(variable) %>% + do(tidy(lm(value ~ Age + TIV + Sex + Scanner, data=.))) +print("DONE!") + +# extract only age effect # +Tstats_comp_age <- filter(lm_Comp_age, term == "Age") + +# conduct post-hoc multiple comparison correction # +Tstats_comp_age$FWE <- p.adjust(Tstats_comp_age$p.value, method = "holm") +Tstats_comp_age$BONF <- p.adjust(Tstats_comp_age$p.value, method = "bonferroni") + +# likely a better way to turn non-sig (<= 0.05) folllowing multiple comparison +# correction to zero and the rest to values of statistic column than this +# two step indexing but this still works +# index sig values +sig_idx <- Tstats_comp_age$FWE <= 0.05 +# first create zero column to replace only sig values +Tstats_comp_age$stat_sig <- 0.00001 # small value for better surf projection +# replace sig values with statistic col values +Tstats_comp_age$stat_sig[sig_idx] <- Tstats_comp_age$statistic[sig_idx] + +# name of output t-stat csv file # +Tstat_name <- paste(parc_name, "_Age_Tstats_n", nrow(comp_df), ".csv", sep = "") + +## OUTPUT 2: Component wise age regression statistics ## +print(paste0("Write age regression model csv file ", Tstat_name, + " here: ", output_path)) +write.csv(Tstats_comp_age, row.names = FALSE, + file = paste(output_path, Tstat_name, sep = "/")) +print("DONE!") + +# write out T stat parc brain nifti # +age_t_stat <- abs(c(Tstats_comp_age$statistic)) # make positive for easier plotting +age_t_stat_sig <- abs(c(Tstats_comp_age$stat_sig)) +# parc volume vector # +parc_vol <- c(img_data(opnmf_parc)) + +# extra volume used for manipulation and writing # +out_vol <- parc_vol +out_vol_sig <- parc_vol + +# change value of parcellation to age regression T-statistic # +print("Insert age effect to appropriate OPNMF factor") +for (i in 1:(length(unique(parc_vol))-1)) { + + comp_ind <- parc_vol == i # index comp number + out_vol[comp_ind] <- age_t_stat[i] # change comp number in vol vector to t-stat + out_vol_sig[comp_ind] <- age_t_stat_sig[i] # change comp number in vol vector to t-stat sig +} +print("DONE!") +# output dir & output image name # +Tstat_img_name <- paste(parc_name, "_Age_Tstats_n", nrow(comp_df), sep = "") +Tstat_img_sig_name <- paste(parc_name, "_Age_Tstats_FWE_n", nrow(comp_df), sep = "") + +## OUTPUT 3 & 4: T-statistic brain projection of parcellation Sig and total ## +print(paste0("Write age effect Tstat image ", Tstat_img_name, " & ", + Tstat_img_sig_name, " here: ", output_path)) + +vol_2_img(vol_vec = out_vol, img_info = opnmf_parc, + img_name = Tstat_img_name, location = output_path) +vol_2_img(vol_vec = out_vol_sig, img_info = opnmf_parc, + img_name = Tstat_img_sig_name, location = output_path) + +print("DONE!") + +#### Part 4: Deformation GM Aging decline correlation #### + +# create the def avg parcellation vol data to write out # + +parc_def <- parcel_avg(parc = opnmf_parc, img = def_img, img_vol = TRUE) + +# write out def parc # +## Output 5: Average deformation parcellation image ## +def_parc_name <- paste(parc_name, sub(pattern = ".nii.gz", replacement = "\\1", + basename(def_name)), sep = "_") + +print(paste0("Write OPNMF deformation image ", def_parc_name, + " here: ", output_path)) +vol_2_img(vol_vec = parc_def, img_info = opnmf_parc, img_name = def_parc_name, + location = output_path) +print("DONE!") + +# vector of deformation avg in each parcel # + +def_dat <- parcel_avg(parc = opnmf_parc, img = def_img, img_vol = FALSE) + +print("Create csv file with aging and deformaiton data") +# df with T-stat and def for each parcel & zscore for easier plotting # +age_def_df <- dplyr::select(Tstats_comp_age, variable, statistic) %>% + #mutate(statistic = abs(statistic)) %>% + add_column(., def_dat) %>% + as_tibble() %>% # must add or zscore cols are NA's not sure why?? # + mutate( + zscore_def = (def_dat - mean(def_dat)) / sd(def_dat, na.rm = T), + zscore_age = (statistic - mean(statistic)) / sd(statistic) + ) +print("DONE!") + +### OUTPUT 6 - def - age table for post-hoc analysis +out_age_def_name <- paste(sub(pattern = ".csv", replacement = "\\1", + basename(meta_dat_name)), "_Rank_", + as.character(num_comps), def_parc_name, + ".csv", sep = "") +print(paste0("Write aging and deformation csv file ", out_age_def_name, + " here: ", output_path)) +write.csv(age_def_df, row.names = FALSE, + file = paste(output_path, out_age_def_name, sep="/")) +print("DONE!") + +# write a deformation Zscore image # + +# extra volume used for manipulation and writing # +out_vol_def <- parc_vol + +# change value of parcellation to age regression T-statistic # +print("Insert Zscore deformation to appropriate OPNMF factor") +for (j in 1:(length(unique(parc_vol))-1)) { + + comp_ind <- parc_vol == j # index comp number + out_vol_def[comp_ind] <- age_def_df$zscore_def[j] # change comp number in vol vector to t-stat + +} +print("DONE!") + +## Output 7: Average deformation Zscore parcellation image ## +def_parc_name_Z <- paste(parc_name, sub(pattern = ".nii.gz", replacement = "\\1", + basename(def_name)), "Zscore", sep = "_") + +print(paste0("Write OPNMF Zscore deformation image ", def_parc_name_Z, + " here: ", output_path)) +vol_2_img(vol_vec = out_vol_def, img_info = opnmf_parc, img_name = def_parc_name_Z, + location = output_path) +print("DONE!") + +# print age and def tibble data to check if correct or makes sense # +age_def_df diff --git a/Utilities/Comp_Num_match.R b/Utilities/Comp_Num_match.R new file mode 100644 index 0000000..f9f11d4 --- /dev/null +++ b/Utilities/Comp_Num_match.R @@ -0,0 +1,112 @@ +# match OPNMF component numbers across two parcellations +# for easier plotting # +wd <- "~/projects/chimp_human_opnmf/" # Working Directory +setwd(wd) +# use neuroconductor to install neurobase as install.packahes("neurobase") fails +# Neurobase just makes it a bit easier to work with nifti's # +source("https://neuroconductor.org/neurocLite.R") +neuro_install("neurobase") +# Load oro.nifti and neurobase to deal with Nifit Images easily & +# fs for file structure manipulation - mainly using 'path_wd()' +pacman::p_load(oro.nifti, fs, neurobase) # clue - might not be needed + +# Helper script with useful functions # +source(file = paste(path_wd(), "code", "OPNMF_util.R", sep = "/")) + +### INPUTS ### +# INPUT 1: Selected Chimp OPNMF (17) parcellation in MNI space following +# cross-species registration and same resolution as IXI OPNMF data # +# Path to chimp OPNMF solutions # +chimp_path <- paste(path_wd(), "data", "chimp", "opnmf_parc", sep = "/") +# Name of chimp OPNMF parcellation # +chimp_img_name <- "wChimp_cort_n189_TPM03_rank_17_median.nii.gz" +# Read in chimp image # +chimp_img <- readnii(paste(chimp_path, chimp_img_name, sep="/")) + +# Input 2: Selected IXI OPNMF parcellation (17) used to match the chimp OPNMF +# Number based on their spatial location as best as possible # +# path to IXI OPNMF solutions # +IXI_path <- paste(path_wd(), "data", "IXI", "opnmf_parc", sep = "/") +# Name of IXI image # +IXI_img_name <- "IXI_cort_n480_rank_17_median.nii.gz" +# Read in IXI image # +IXI_img <- readnii(paste(IXI_path, IXI_img_name, sep = "/")) + + +#Input 3: Chimp OPNMF parcellation in Juna (Chimp) space to have its numbering +# changed to match IXI 17-parcellation solution # +# Name of chimp OPNMF solution in Juna chimp space # +orig_name <- "Chimp_cort_n189_TPM03_rank_17.nii.gz" +# Read in original chimp image # +chimp_orig <- readnii(paste(chimp_path, orig_name, sep = "/")) + + + +### Prepare images for matching parcellation numbers #### + +# Create a Chimp in MNI space GM mask to be used to mask the IXI image # +# Create GM mask nifti # +c_masked <- mask_img(chimp_img, IXI_img > 0) +# vector of GM mask # +GM_mask <- c(img_data(c_masked > 0)) + +# create GM volume vectors for both species in MNI space # +chimp_vol <- c(img_data(chimp_img)) +IXI_vol <- c(img_data(IXI_img)) +# GM Volume vectors # +chimp_GM <- chimp_vol[GM_mask] +IXI_GM <- IXI_vol[GM_mask] + +# Create volume vector for chimp nifti in Juna space # +chimp_orig_ind <- c(img_data(chimp_orig > 0)) +# GM volume vector # +chimp_orig_GM <- c(img_data(chimp_orig))[chimp_orig_ind] + +#### Match Parcel Numbers & Write out New Nifti's #### + +# use matching function function to match parcel numbers sing GM vectors # +comp_change <- minWeightBipartiteMatching(chimp_GM, IXI_GM) + +# Create chimp vectors in MNI and Juna space to be used to change numbering & +# write out new number matched images # + +# New vector for chimp in Juna space # +chimp_orig_new <- chimp_orig_GM +# New vector for chimp in MNI space # +chimp_GM_ind <- c(img_data(chimp_img > 0)) +chimp_GM_new <- chimp_vol[chimp_GM_ind] +chimp_GM_out <- chimp_GM_new + +# loop over parcel numbers and change to matched numbers with IXI solution # +for (i in 1:length(unique(chimp_GM))) { + + # index for chimp in mni space # + comp_ind_mni <- chimp_GM_new == i + # index for chimp in Juna space # + comp_ind_orig <- chimp_orig_GM == i + # new numbers for the chimp in mni space # + chimp_GM_out[comp_ind_mni] <- comp_change[i] + # new number for chimp in Juna space # + chimp_orig_new[comp_ind_orig] <- comp_change[i] + +} + +#### OUTPUTS #### +# Output 1: chimp OPNMF solution in Juna space parcel number matched to IXI 17 # +# write out new images # +# chimp in Juna space # +out_name_orig <- paste(substr(orig_name, 1, nchar(orig_name)-7), "num_match", + sep = "_") +out_vol_orig <- c(img_data(chimp_orig)) #chimp_vol +out_vol_orig[chimp_orig_ind] <- chimp_orig_new +# Help function for simply writing out new nifit's using vectors # +vol_2_img(vol_vec = out_vol_orig, img_info = chimp_orig, location = chimp_path, + img_name = out_name_orig) +# Output 2: Chimp OPNMF solution in MNI space matched to IXI parecel numbers # +# chimp in MNI space # +out_name_mni <- paste(substr(chimp_img_name, 1, nchar(chimp_img_name)-7), + "num_match", sep = "_") +out_vol_mni <- chimp_vol #chimp_vol MNI space +out_vol_mni[chimp_GM_ind] <- chimp_GM_out +vol_2_img(vol_vec = out_vol_mni, img_info = chimp_img, location = chimp_path, + img_name = out_name_mni) diff --git a/Utilities/OPNMF_util.R b/Utilities/OPNMF_util.R new file mode 100644 index 0000000..17012bf --- /dev/null +++ b/Utilities/OPNMF_util.R @@ -0,0 +1,125 @@ +### utility script for OPNMF post-hoc analysis #### +# Contains functions for commonly used processes # +# S.Vickery 08.2021 # +# FUNCTION 1: Use a parcellation to take the mean of the values of another +# image/s at each parcel/component # + + +parcel_avg <- function(parc, img, img_vol = TRUE) { + require("neurobase") + require("oro.nifti") + ### NEED to add checks that imput is an nifti and that the dimensions are the + ### same !!! + ### Also add proper function commnets ### + # read and vectorise parcellation + #parc_img <- readnii(parc) + ##### NEED to check for NAN's in image #### + parc_vol <- c(img_data(parc)) + + # No. of components in the parcellation # + num_comps <- length(unique(parc_vol))-1 + + # read and vectorise image to be masked and mean + #input_img <- readnii(img) + input_img_vol <- c(img_data(img)) + + # output vol to be changed through for loop + out_vol <- parc_vol + + # an empty vector for comp avg values # + out_avg <- rep(NA, num_comps) + + # create mask for each comp and take mean value of mask + for (i in 1:num_comps) { + + comp_ind <- parc_vol == i + if (img_vol) { + out_vol[comp_ind] <- mean(input_img_vol[comp_ind]) + } else { + out_avg[i] <- mean(input_img_vol[comp_ind]) + } + #out_vol[comp_ind] <- mean(input_img_vol[comp_ind]) + #out_avg[i] <- mean(input_img_vol[comp_ind]) + + } + # output a vector where the comp values have been changed to an avg of the + # input image which can then be used to create a nifti image or conduct further + # analysis # + if (img_vol) { + return(out_vol) + } else { + return(out_avg) + } + +} + + +vol_2_img <- function(vol_vec, img_info, img_name, location) { + require("neurobase") + require("oro.nifti") + ### need to check that vol data is the same size as img_info + # create array to be written into a nifti + out_arr <- array(vol_vec, dim = dim(img_info)) + # copy nifti header info for writing out # + vol_nifti <- copyNIfTIHeader(img = img_info, arr = out_arr) + # create different filnames and write out nifti to directory # + fil_name <- img_name + # wriet out nifti to location with filename + writenii(vol_nifti, filename = paste(location, fil_name, sep = "/")) +} + + +# labels from cluster A will be matched on the labels from cluster B +# function that takes two cluster vectors and matches the cluster numbers +# this function was directly copied from +# https://things-about-r.tumblr.com/post/36087795708/matching-clustering-solutions-using-the-hungarian +minWeightBipartiteMatching <- function(clusteringA, clusteringB) { + require(clue) + idsA <- unique(clusteringA) # distinct cluster ids in a + idsB <- unique(clusteringB) # distinct cluster ids in b + nA <- length(clusteringA) # number of instances in a + nB <- length(clusteringB) # number of instances in b + if (length(idsA) != length(idsB) || nA != nB) { + stop("number of cluster or number of instances do not match") + } + + nC <- length(idsA) + tupel <- c(1:nA) + + # computing the distance matrix + assignmentMatrix <- matrix(rep(-1, nC * nC), nrow = nC) + for (i in 1:nC) { + tupelClusterI <- tupel[clusteringA == i] + solRowI <- sapply(1:nC, function(i, clusterIDsB, tupelA_I) { + nA_I <- length(tupelA_I) # number of elements in cluster I + tupelB_I <- tupel[clusterIDsB == i] + nB_I <- length(tupelB_I) + nTupelIntersect <- length(intersect(tupelA_I, tupelB_I)) + return((nA_I - nTupelIntersect) + (nB_I - nTupelIntersect)) + }, clusteringB, tupelClusterI) + assignmentMatrix[i, ] <- solRowI + } + + # optimization + result <- solve_LSAP(assignmentMatrix, maximum = FALSE) + attr(result, "assignmentMatrix") <- assignmentMatrix + return(result) +} + +# compares similarity of clusters between two parcellations # +parc_sim_ARI <- function(parc1, parc2, GM_mask){ + library("oro.nifti") # to read in nifti's + library("neurobase") # read in nifti's and create data + library("aricode") # determine adjusted Rank Index of cluster similarity + + # GM mask for index # + mask_ind <- c(img_data(GM_mask > 0)) + + # masked vector of parcellation values + parc1_dat <- c(img_data(parc1))[mask_ind] + parc2_dat <- c(img_data(parc2))[mask_ind] + + sim <- ARI(parc1_dat, parc2_dat) + return(sim) +} + diff --git a/Utilities/README.md b/Utilities/README.md new file mode 100644 index 0000000..9eba43c --- /dev/null +++ b/Utilities/README.md @@ -0,0 +1,11 @@ +# Helper and Utility Scripts + +The ```OPNMF_util.R``` has functions needed to simply create Nifti images, average over parcels, conduct ARI, and match parcellation numbers. + +```chimp_ixi_age_sex_plot.R``` creates Figure 1B for the manuscript + +```brewer_PrGn.clut``` is an LUT file for ```mricroGL``` and ```surfIce``` to create the brewer color scale for the Z-scored Expansion maps. + +```comp_num_match.R``` matches the parcel numbers from the selected chimpanzee 17-factor solution to the IXI 17-factor solution. + +```eNKI_meta_create.R``` establishes the lifespan sample used for teh replication analysis of the human aging - expansion relationship. diff --git a/Utilities/brewer_PrGn.clut b/Utilities/brewer_PrGn.clut new file mode 100644 index 0000000..9d9b706 --- /dev/null +++ b/Utilities/brewer_PrGn.clut @@ -0,0 +1,31 @@ +[STR] +comment=Brewer 10 class PrGn (https://colorbrewer2.org/) by Sam Vickery s.vickery18@gamil.com +[FLT] +min=0 +max=0 +[INT] +numnodes=10 +[BYT] +nodeintensity0=0 +nodeintensity1=27 +nodeintensity2=54 +nodeintensity3=81 +nodeintensity4=108 +nodeintensity5=135 +nodeintensity6=182 +nodeintensity7=209 +nodeintensity8=236 +nodeintensity9=255 + +[RGBA255] +nodergba0=64|0|75|0 +nodergba1=118|42|131|14 +nodergba2=153|112|171|28 +nodergba3=194|165|207|42 +nodergba4=231|212|232|58 +nodergba5=217|240|211|72 +nodergba6=166|219|160|86 +nodergba7=90|174|97|100 +nodergba8=27|120|55|114 +nodergba9=0|68|27|128 + diff --git a/Utilities/chimp_ixi_age_sex_plot.R b/Utilities/chimp_ixi_age_sex_plot.R new file mode 100644 index 0000000..1929bf1 --- /dev/null +++ b/Utilities/chimp_ixi_age_sex_plot.R @@ -0,0 +1,66 @@ +# Create violin plots for the chimp and IXI whole sample meta data # +# to show age, sex, & scanner distribution # + +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, ggplot2, dplyr, tidyr, readr, tibble, stringr) + + +#### Load meta data from csv files for both samples #### + +### INPUT 1: Chimpanzee meta data csv ### +# chimp csv meta data # +chimp_path <- paste(path_wd(), "data", "chimp", sep = "/") +chimp_meta <- read_csv(file = paste(chimp_path, + "Chimp_meta_QC_u50_n189.csv", + sep = "/")) +### INPUT 2: IXI meta data csv ### +# IXI csv meta data # +IXI_path <- paste(path_wd(), "data", "IXI", sep = "/") +IXI_meta <- read_csv(file = paste(IXI_path, + "IXI_TIV_AGE.csv", sep = "/")) +# clean IXI meta data by removing NaN's, IOP sample, irrelevant data, +# subject over 75 y/o to get the n=480 sample used for paper # + +IXI_clean <- IXI_meta %>% + na.omit() %>% + select(Site, Subject, TIV, GM, WM, CSF, WMH, IQR, + AGE, `SEX_ID (1=m, 2=f)`) %>% + filter(Site %in% c("GUYS", "HH")) %>% + filter(AGE < 75) %>% + mutate(Site = str_replace_all(Site, c("GUYS" = "1.5T", "HH" = "3T"))) %>% + rename(Scanner = Site, Age = AGE, Sex = `SEX_ID (1=m, 2=f)`) %>% + mutate(Sex = str_replace_all(Sex, c("1" = "Male", "2" = "Female"))) + +### OUTPUT 1: clean IXI meta data (N=480) csv file ### +write_csv(IXI_clean, file = paste(IXI_path, "IXI_n480__U75_meta.csv", + sep = "/")) + +### join df for easier plotting ### + +# select columns that are needed for plotting & add species column # +chimp_join <- chimp_meta %>% + select(Subject, Age, Sex, Scanner) %>% + mutate(Species = "Chimpanzee") +IXI_join <- IXI_clean %>% + select(Subject, Age, Sex, Scanner) %>% + mutate(Species = "Human") + +# combine samples # +total_meta <- rbind(chimp_join, IXI_join) + +ggplot(data = total_meta, aes(x=Species, y=Age)) + + geom_violin(aes(fill=Species), trim = FALSE) + + geom_jitter(aes(shape=Scanner, color=Sex), width = 0.2, size = 2.5) + + scale_fill_brewer(palette = "Pastel2") + + scale_color_manual(values = c("#d01c8b", "#0571b0")) + + scale_shape_manual(values = c(17, 19)) + + theme_classic(base_size = 15) +### OUTPUT 2: Chimp & Human meta data violin plot ### +ggsave(filename = paste(path_wd(), "outputs", + "chimp_IXI_meta_plot.png", sep = "/"), dpi = 400) + diff --git a/Utilities/eNKI_meta_create.R b/Utilities/eNKI_meta_create.R new file mode 100644 index 0000000..8e89513 --- /dev/null +++ b/Utilities/eNKI_meta_create.R @@ -0,0 +1,58 @@ +# create meta data csv fie for eNKI sample to conduct age_def_compare # + +wd <- "~/projects/chimp_human_opnmf/" +setwd(wd) + +# load in libraries needed for script # +# need to install pacman if not already installed +if (!require("pacman")) install.packages("pacman") +pacman::p_load(fs, ggplot2, dplyr, tidyr, readr, tibble) + +# dir with eNKI data inclusding meta data csv files # +meta_dir <- paste(path_wd(), "data", "eNKI", sep = "/") + +# csv file with meta data from complete eNKI sample # +eNKI_meta_total <- read_csv(file = paste(meta_dir, "eNKI_meta.csv", sep = "/")) %>% + filter(isPatient %in% 0) %>% # remove patients + rename(Subject = participant_id, Sex = sex, Age = age) %>% # col name Subject to match other meta data files + dplyr::select(Subject, Sex, Age) %>% + distinct(Subject, .keep_all = TRUE) %>% # remove duplicates of subjects + na.omit() # remove rows with any missing data + +# csv file with meta data from CAT12 preprocessed images and Juelich atlas ROI's +# this represents the images we have preprocessed to be used # +eNKI_meta_CAT <- read_csv(file = paste(meta_dir, + "eNKI_cat12.8_1mm_rois_julichbrain.csv", sep = "/")) %>% + dplyr::select(1:9) %>% # remove columns for roi atlas data + mutate(Session = as.factor(Session)) %>% + rename(Subject = SubjectID) %>% # col name Subject to match other meta data files + distinct(Subject, .keep_all = TRUE) %>% # remove multiple scans + na.omit() # remove rows with any missing data + +# outliers for QC measures to remove poor quality images # +poor_NCR_2 <- mean(eNKI_meta_CAT$NCR) + (2 * sd(eNKI_meta_CAT$NCR)) +poor_IQR_2 <- mean(eNKI_meta_CAT$IQR) + (2 * sd(eNKI_meta_CAT$IQR)) +poor_ICR_2 <- mean(eNKI_meta_CAT$ICR) + (2 * sd(eNKI_meta_CAT$ICR)) + +# filter out poor images based on QC outliers # +eNKI_meta_CAT_QC <- eNKI_meta_CAT %>% + filter(NCR <= poor_NCR_2) %>% + filter(IQR <= poor_IQR_2) %>% + filter(ICR <= poor_ICR_2) + + +# join with meta data csv to add age of subjects and remove young people # +# lower & upper age limit for filtering # +# lifespan sample # +age_min <- 0 +age_max <- 90 +eNKI_sample_total <- inner_join(eNKI_meta_CAT_QC, eNKI_meta_total, by = "Subject") %>% + filter(Age >= age_min & Age <= age_max) # keep only adults + +write_csv(eNKI_sample_total, file = paste(meta_dir, + paste0("eNKI_meta_QC_n", + nrow(eNKI_sample_total), + "_min", age_min, "_max", age_max, + ".csv"), + sep = "/")) +