Skip to content

Commit c716dfd

Browse files
authored
Add Gaudi CI for Sentence Transformers (huggingface#928)
1 parent 6bc723a commit c716dfd

File tree

4 files changed

+171
-1
lines changed

4 files changed

+171
-1
lines changed

.github/workflows/slow_tests.yml

+45-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ jobs:
4646
]
4747
example-diff:
4848
name: Test examples differences
49-
needs: start-runner # required to start the main job when the runner is ready
49+
needs:
50+
- start-runner # required to start the main job when the runner is ready
5051
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
5152
env:
5253
AWS_REGION: us-west-2
@@ -287,6 +288,48 @@ jobs:
287288
--ipc=host \
288289
vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest \
289290
/bin/bash tests/ci/slow_tests_trl.sh
291+
sentence-transformers:
292+
name: Test Sentence Transformers integration
293+
if: ${{ !cancelled() && (success() || failure()) }}
294+
needs:
295+
- start-runner
296+
- example-diff
297+
- deepspeed
298+
- multi-card
299+
- single-card
300+
- albert-xxl-single-card
301+
- text-generation
302+
- trl # run the job when the previous test jobs are done
303+
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
304+
env:
305+
AWS_REGION: us-west-2
306+
steps:
307+
- name: Checkout Optimum Habana
308+
uses: actions/checkout@v2
309+
with:
310+
repository: 'huggingface/optimum-habana'
311+
path: optimum-habana
312+
- name: Checkout Sentence Transformers
313+
uses: actions/checkout@v2
314+
with:
315+
repository: 'UKPLab/sentence-transformers'
316+
path: sentence-transformers
317+
- name: Pull image
318+
run: |
319+
docker pull vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest
320+
- name: Run tests
321+
run: |
322+
docker run \
323+
-v $PWD:/root/workspace \
324+
--workdir=/root/workspace \
325+
--runtime=habana \
326+
-e HABANA_VISIBLE_DEVICES=all \
327+
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
328+
--cap-add=sys_nice \
329+
--net=host \
330+
--ipc=host \
331+
vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest \
332+
/bin/bash optimum-habana/tests/ci/sentence_transformers.sh
290333
stop-runner:
291334
name: Stop self-hosted EC2 runner
292335
needs:
@@ -298,6 +341,7 @@ jobs:
298341
- albert-xxl-single-card
299342
- text-generation
300343
- trl
344+
- sentence-transformers
301345
runs-on: ubuntu-22.04
302346
env:
303347
AWS_REGION: us-west-2

.github/workflows/slow_tests_gaudi2.yml

+34
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,37 @@ jobs:
192192
--ipc=host \
193193
vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest \
194194
/bin/bash tests/ci/slow_tests_trl.sh
195+
sentence-transformers:
196+
name: Test Sentence Transformers integration
197+
if: ${{ !cancelled() && (success() || failure()) }}
198+
needs:
199+
- trl
200+
runs-on: [self-hosted, linux, x64, gaudi2]
201+
steps:
202+
- name: Checkout Optimum Habana
203+
uses: actions/checkout@v2
204+
with:
205+
repository: 'huggingface/optimum-habana'
206+
path: optimum-habana
207+
- name: Checkout Sentence Transformers
208+
uses: actions/checkout@v2
209+
with:
210+
repository: 'UKPLab/sentence-transformers'
211+
path: sentence-transformers
212+
- name: Pull image
213+
run: |
214+
docker pull vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest
215+
- name: Run tests
216+
run: |
217+
docker run \
218+
-v $PWD:/root/workspace \
219+
--workdir=/root/workspace \
220+
--runtime=habana \
221+
-e HABANA_VISIBLE_DEVICES=all \
222+
-e GAUDI2_CI=1 \
223+
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
224+
--cap-add=sys_nice \
225+
--net=host \
226+
--ipc=host \
227+
vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest \
228+
/bin/bash optimum-habana/tests/ci/sentence_transformers.sh

tests/ci/sentence_transformers.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
python -m pip install --upgrade pip
4+
python -m pip install /root/workspace/optimum-habana[tests]
5+
cd /root/workspace/sentence-transformers/tests
6+
python -m pip install ..
7+
python -m pytest test_compute_embeddings.py test_evaluator.py test_multi_process.py test_pretrained_stsb.py test_util.py
8+
cd /root/workspace/optimum-habana/tests
9+
python -m pytest test_sentence_transformers.py

tests/test_sentence_transformers.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import csv
2+
import gzip
3+
import os
4+
import time
5+
6+
import pytest
7+
from sentence_transformers import SentenceTransformer, util
8+
9+
from .test_examples import TIME_PERF_FACTOR
10+
11+
12+
if os.environ.get("GAUDI2_CI", "0") == "1":
13+
# Gaudi2 CI baselines
14+
MODELS_TO_TEST = [
15+
("sentence-transformers/all-mpnet-base-v2", 762.5595168883357),
16+
("sentence-transformers/multi-qa-mpnet-base-dot-v1", 545.3360251829846),
17+
("sentence-transformers/all-distilroberta-v1", 958.5097903298335),
18+
("sentence-transformers/all-MiniLM-L12-v2", 3614.2610109716247),
19+
("sentence-transformers/multi-qa-distilbert-cos-v1", 944.6166139694299),
20+
("sentence-transformers/all-MiniLM-L6-v2", 2615.6975354038477),
21+
("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 1208.3672807492396),
22+
("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 2392.1654748794062),
23+
("sentence-transformers/paraphrase-albert-small-v2", 3896.1911011860166),
24+
("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 3558.0778715789693),
25+
("sentence-transformers/paraphrase-MiniLM-L3-v2", 5734.318427972881),
26+
("sentence-transformers/distiluse-base-multilingual-cased-v1", 3487.3319366004903),
27+
("sentence-transformers/distiluse-base-multilingual-cased-v2", 3807.2486282025716),
28+
]
29+
else:
30+
# Gaudi1 CI baselines
31+
MODELS_TO_TEST = [
32+
("sentence-transformers/all-mpnet-base-v2", 164.36556936723508),
33+
("sentence-transformers/multi-qa-mpnet-base-dot-v1", 116.82789535569364),
34+
("sentence-transformers/all-distilroberta-v1", 226.90237421623164),
35+
("sentence-transformers/all-MiniLM-L12-v2", 1252.6261862281467),
36+
("sentence-transformers/multi-qa-distilbert-cos-v1", 216.47035182888888),
37+
("sentence-transformers/all-MiniLM-L6-v2", 1109.160132821451),
38+
("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 471.14320842607674),
39+
("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 518.4762252952173),
40+
("sentence-transformers/paraphrase-albert-small-v2", 1139.806075824319),
41+
("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 1253.06776127632),
42+
("sentence-transformers/paraphrase-MiniLM-L3-v2", 3029.398417051629),
43+
("sentence-transformers/distiluse-base-multilingual-cased-v1", 947.844857744754),
44+
("sentence-transformers/distiluse-base-multilingual-cased-v2", 947.7317550605878),
45+
]
46+
47+
48+
def _test_sentence_transformers(
49+
model_name: str,
50+
baseline: float,
51+
):
52+
model = SentenceTransformer(model_name)
53+
54+
nli_dataset_path = "/tmp/datasets/AllNLI.tsv.gz"
55+
sentences = set()
56+
max_sentences = 10000
57+
58+
# Download datasets if needed
59+
if not os.path.exists(nli_dataset_path):
60+
util.http_get("https://sbert.net/datasets/AllNLI.tsv.gz", nli_dataset_path)
61+
62+
with gzip.open(nli_dataset_path, "rt", encoding="utf8") as fIn:
63+
reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
64+
for row in reader:
65+
sentences.add(row["sentence1"])
66+
if len(sentences) >= max_sentences:
67+
break
68+
69+
sentences = list(sentences)
70+
71+
for i in range(2):
72+
start_time = time.perf_counter()
73+
_ = model.encode(sentences, batch_size=32)
74+
end_time = time.perf_counter()
75+
diff_time = end_time - start_time
76+
measured_throughput = len(sentences) / diff_time
77+
# Only assert the last measured throughtput as the first iteration is used as a warmup
78+
assert measured_throughput >= (2 - TIME_PERF_FACTOR) * baseline
79+
80+
81+
@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST)
82+
def test_compute_embeddings_throughput(model_name: str, baseline: float):
83+
_test_sentence_transformers(model_name, baseline)

0 commit comments

Comments
 (0)