From 68bf6ecfcf24d04706fa6a9fbb9a82f760368117 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Mon, 6 Nov 2023 11:41:14 -0800 Subject: [PATCH 1/4] Updates test-t5x.sh to support finetuning+eval code paths --- .github/container/test-t5x.sh | 110 +++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 14 deletions(-) diff --git a/.github/container/test-t5x.sh b/.github/container/test-t5x.sh index 573834b8d..dc48e08e2 100755 --- a/.github/container/test-t5x.sh +++ b/.github/container/test-t5x.sh @@ -7,7 +7,7 @@ print_var() { } usage() { - echo "Test T5X throughput on a fake-data Wikipedia benchmark." + echo "Test T5X throughput on a fake-data Wikipedia benchmark or real glue/mnli." echo "" echo "Usage: $0 [OPTIONS]" echo "" @@ -18,6 +18,7 @@ usage() { echo " -d, --dtype Data type, defaults to bfloat16." echo " --enable-te {0,1} 1 to enable, 0 to disable; defaults to ENABLE_TE in env or 0 if unset" echo " -e, --epochs Number of epochs to run, defaults to 7." + echo " -f, --finetune Uses squad1 finetuning configs (w/ dropout). By default will only use pre-training configs (no dropout)." echo " --multiprocess Enable the multiprocess GPU mode." echo " -o, --output NAME Name for the output folder, a temporary folder will be created if none specified." echo " --seed INT Random seed for deterministim. Defaults to 42." @@ -26,7 +27,7 @@ usage() { exit $1 } -args=$(getopt -o a:b:cd:e:ho:s: --long additional-args:,batch-size:,use-contrib-configs,dtype:,enable-te:,epochs:,help,multiprocess,output:,seed:,steps-per-epoch: -- "$@") +args=$(getopt -o a:b:cd:e:fho:s: --long additional-args:,batch-size:,use-contrib-configs,dtype:,enable-te:,epochs:,finetune,help,multiprocess,output:,seed:,steps-per-epoch: -- "$@") if [[ $? -ne 0 ]]; then exit 1 fi @@ -38,6 +39,7 @@ BATCH_SIZE=0 USE_CONTRIB_CONFIGS=0 DTYPE=bfloat16 EPOCHS=7 +FINETUNE=0 MULTIPROCESS=0 OUTPUT=$(mktemp -d) SEED=42 @@ -71,6 +73,10 @@ while [ : ]; do EPOCHS="$2" shift 2 ;; + -f | --finetune) + FINETUNE=1 + shift 1 + ;; -h | --help) usage 1 ;; @@ -115,6 +121,7 @@ print_var USE_CONTRIB_CONFIGS print_var DTYPE print_var ENABLE_TE print_var EPOCHS +print_var FINETUNE print_var OUTPUT print_var MULTIPROCESS print_var STEPS_PER_EPOCH @@ -158,32 +165,76 @@ seqio.TaskRegistry.add( EOF ## Create GIN file -cat > benchmark.gin < pretrain.gin < finetune.gin < Date: Mon, 6 Nov 2023 11:46:27 -0800 Subject: [PATCH 2/4] T5x: two new test cases for finetuning w/ and w/o TE --- .github/workflows/_test_t5x_rosetta.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/_test_t5x_rosetta.yaml b/.github/workflows/_test_t5x_rosetta.yaml index 58bb562be..50a45a912 100644 --- a/.github/workflows/_test_t5x_rosetta.yaml +++ b/.github/workflows/_test_t5x_rosetta.yaml @@ -179,6 +179,16 @@ jobs: N_NODE: 2 ADDITIONAL_ARGS: "--enable-te 0" EXTRA_GIN_ARGS: "" + - TEST_NAME: "1N2G-ft_te-1" + N_GPU: 2 + N_NODE: 1 + ADDITIONAL_ARGS: "--finetune" + EXTRA_GIN_ARGS: "" + - TEST_NAME: "1N2G-ft_te-0" + N_GPU: 2 + N_NODE: 1 + ADDITIONAL_ARGS: "--enable-te 0 --finetune" + EXTRA_GIN_ARGS: "" fail-fast: false runs-on: ubuntu-22.04 From 0187e4ba6aed1a69e546428bc72b2a33dd9f2697 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Mon, 6 Nov 2023 11:48:00 -0800 Subject: [PATCH 3/4] typo --- .github/container/test-t5x.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/container/test-t5x.sh b/.github/container/test-t5x.sh index dc48e08e2..f8778bece 100755 --- a/.github/container/test-t5x.sh +++ b/.github/container/test-t5x.sh @@ -7,7 +7,7 @@ print_var() { } usage() { - echo "Test T5X throughput on a fake-data Wikipedia benchmark or real glue/mnli." + echo "Test T5X throughput on a fake-data Wikipedia benchmark or real squad." echo "" echo "Usage: $0 [OPTIONS]" echo "" From e5ca9b292d714129b0992d92f5951871f8350e25 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 12 Dec 2023 10:21:37 -0800 Subject: [PATCH 4/4] casing of squad --- .github/container/test-t5x.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/container/test-t5x.sh b/.github/container/test-t5x.sh index f8778bece..53b451cf5 100755 --- a/.github/container/test-t5x.sh +++ b/.github/container/test-t5x.sh @@ -7,7 +7,7 @@ print_var() { } usage() { - echo "Test T5X throughput on a fake-data Wikipedia benchmark or real squad." + echo "Test T5X throughput on a fake-data Wikipedia benchmark or real SQuAD." echo "" echo "Usage: $0 [OPTIONS]" echo ""