Skip to content

Commit

Permalink
Code for processing TLAE dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
emdupre committed May 9, 2024
1 parent 21a781f commit 26b5317
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
47 changes: 47 additions & 0 deletions bayes_ca/data/tlae-embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import json
from pathlib import Path

import click
import whisper
import numpy as np
from sentence_transformers import SentenceTransformer


@click.command()
@click.option("--file")
@click.option("--datadir")
@click.option("--outdir")
def main(file, datadir, outdir):
"""
This script assumes you have access to the copyrighted stimuli
and that you are running in an environment with SBert (i.e.,
sentence-transformers) and OpenAI's Whisper installed.
Params
------
file : str
Stimulus file to transcribe and generate embedding
datadir : str
Local path to the stimuli files
outdir : str
Local path to store transcriptions and embeddings
"""
model = whisper.load_model("medium.en")
result = model.transcribe(str(Path(datadir, file)))

whisper_outname = "whisper-" + str(Path(file).with_suffix(".json"))
with open(Path(outdir, whisper_outname), "w") as outfile:
json.dump(result, outfile, indent=4)

sentences = result["text"].split(". ")
model = SentenceTransformer("all-mpnet-base-v2")

embeddings = model.encode(sentences)
sbert_outname = Path(outdir, f"sbert-{whisper_outname}")
np.save(sbert_outname, embeddings, allow_pickle=False)

return


if __name__ == "__main__":
main()
32 changes: 32 additions & 0 deletions bayes_ca/data/tlae-fmriprep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
#
#SBATCH --job-name=tlae_fMRIPrep
#SBATCH --output=tlae_fmriprep.%j.out
#SBATCH --time=1-00:00
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=8GB
#SBATCH --array=0-25
#SBATCH -p russpold,owners

# Define directories

DATADIR=$OAK/users/emdupre/think-like-an-expert/ds003233
OUTDIR=$SCRATCH/think-like-an-expert
SIFDIR=$OAK/users/emdupre/think-like-an-expert/
LICENSE=$HOME/submission_scripts

# Begin work section
subj_list=(`find $DATADIR -maxdepth 1 -type d -name 'sub-s*' -printf '%f\n' | sort -n -ts -k2.1`)
sub="${subj_list[$SLURM_ARRAY_TASK_ID]}"
echo "SUBJECT_ID: " $sub

singularity run --cleanenv -B ${DATADIR}:/data:ro \
-B ${OUTDIR}:/out \
-B ${LICENSE}/license.txt:/license/license.txt:ro \
${SIFDIR}/fmriprep-23-2-0.sif \
/data /out participant \
--participant-label ${sub} \
--output-space fsaverage5 MNI152NLin2009cAsym:res-2 \
-w /out/workdir \
--notrack \
--fs-license-file /license/license.txt

0 comments on commit 26b5317

Please sign in to comment.