Skip to content

Commit

Permalink
data finish, sim metrics condensed
Browse files Browse the repository at this point in the history
  • Loading branch information
nverma1 committed Dec 5, 2022
1 parent 0d38a6d commit cc49db9
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 37 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Requirements
Setup
-------
- download third party packages: `sh third_party/get_third_party.sh`
- download and make data: `cd data && sh make_data.sh`

Usage
-------
Expand Down
2 changes: 1 addition & 1 deletion data/make_data.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ for year in 2018 2019; do
gunzip news.$year.en.shuffled.deduped.gz
done

cat news.2018.en.shuffled.deduped.gz news.2019.en.shuffled.deduped.gz news.2020.en.shuffled.deduped.gz > news.2018-2020.en.full
cat news.2018.en.shuffled.deduped news.2019.en.shuffled.deduped news.2020.en.shuffled.deduped > news.2018-2020.en.full

cd ../..

Expand Down
6 changes: 0 additions & 6 deletions local-settings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
###############################################################################
### Environment setting.
###############################################################################
# start conda env.
source /home/kmarc/anaconda3/etc/profile.d/conda.sh
conda activate /home/kmarc/anaconda3/envs/isovec

# path to my working directory
export DIR=`pwd`
Expand All @@ -16,9 +13,6 @@ export MOSES_SCRIPTS=/home/kmarc/moses/scripts
export VECMAP=$DIR/third_party/vecmap_fork
export WORD2VEC=$DIR/third_party/word2vec/word2vec
export ISOSTUDY_SCRIPTS=`pwd`/third_party/iso_study_fork/scripts
export GRID=CLSP

echo 'TEMPDIR is: ' $TEMPDIR
echo You are running in this environment: $CONDA_PREFIX
echo 'You are running on machine: ' `hostname` ' on the ' $GRID ' grid.'
set -e
21 changes: 0 additions & 21 deletions qsub-sim-metrics.sh

This file was deleted.

10 changes: 5 additions & 5 deletions run-sim-metrics.sh → sim_metrics/run-sim-metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ do
REF_EMBS=$OUTDIR/en/embs.out
for metric in evs rsim gh
do
sh qsub-sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric $OUTDIR
sh sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric
done
done
done
Expand All @@ -33,7 +33,7 @@ do
REF_EMBS=exps/baseline/10/en/embs.out
for metric in evs rsim gh
do
sh qsub-sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric $OUTDIR
sh sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric
done
done
done
Expand All @@ -55,7 +55,7 @@ do
REF_EMBS=$OUTDIR/en.mapped.sup
for metric in evs rsim gh
do
sh qsub-sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric $OUTDIR
sh sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric
done
done
done
Expand All @@ -74,7 +74,7 @@ do
REF_EMBS=$EXPDIR/test/en.mapped.semisup
for metric in evs rsim gh
do
sh qsub-sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric $OUTDIR
sh sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric
done
done
done
Expand All @@ -95,7 +95,7 @@ do
REF_EMBS=$EXPDIR/mapped/en.mapped.semisup
for metric in evs rsim gh
do
sh qsub-sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric $OUTDIR
sh sim-metrics.sh $lang en $SRC_EMBS $REF_EMBS $metric
done
done
done
Expand Down
2 changes: 1 addition & 1 deletion sim-metrics.sh → sim_metrics/sim-metrics.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

. ./local-settings.sh
. ../local-settings.sh

SRC=$1
TRG=$2
Expand Down
2 changes: 1 addition & 1 deletion third_party/get_third_party.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ cd word2vec && make && cd ..
# Convert vectors to bin format for monolingual eval
git clone https://github.com/marekrei/convertvec.git

wget https://github.com/facebookresearch/fairseq/blob/main/examples/m2m_100/tokenizers/tokenize_indic.py


4 changes: 2 additions & 2 deletions train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
### This script has been written for use on the JHU CLSP Grid
### -- Kelly Marchisio, Feb 2022.

. ./local-settings-gpu.sh
. ./local-settings.sh

# https://unix.stackexchange.com/questions/129391/passing-named-arguments-to-shell-scripts
# t is the only letter left...

while getopts "a:b:c:d:e:f:g:h:i:j:k:l:m:n:o:p:q:r:s:u:v:w:x:y:z:" opt; do
case $opt in
f) INFILE="$OPTARG"
Expand Down

0 comments on commit cc49db9

Please sign in to comment.