Skip to content

Commit dce9e55

Browse files
yafsharregisssvidyasivkaixuanliu
authored
Only pass the use_kv_cache True to generator (huggingface#1366)
Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> Co-authored-by: Vidya Galli <vidya.s.galli@intel.com> Co-authored-by: kaixuanliu <kaixuan.liu@intel.com>
1 parent f75fa3b commit dce9e55

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

examples/image-to-text/run_pipeline.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,15 @@ def main():
211211
)
212212
generate_kwargs = {
213213
"lazy_mode": True,
214-
"use_cache": args.use_kv_cache,
215214
"hpu_graphs": args.use_hpu_graphs,
216215
"max_new_tokens": args.max_new_tokens,
217216
"ignore_eos": args.ignore_eos,
218217
"use_flash_attention": args.use_flash_attention,
219218
"flash_attention_recompute": args.flash_attention_recompute,
220219
}
220+
if args.use_kv_cache:
221+
generate_kwargs["use_cache"] = args.use_kv_cache
222+
221223
if args.use_hpu_graphs:
222224
from habana_frameworks.torch.hpu import wrap_in_hpu_graph
223225

tests/test_image_to_text_example.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
# Gaudi2 CI baselines
1515
MODELS_TO_TEST = {
1616
"bf16": [
17-
("llava-hf/llava-1.5-7b-hf", 1, 87.2901500056982),
18-
("llava-hf/llava-1.5-13b-hf", 1, 51.04717105443364),
17+
("llava-hf/llava-1.5-7b-hf", 1, 77.98733740859008),
18+
("llava-hf/llava-1.5-13b-hf", 1, 48.54364937033955),
1919
("llava-hf/llava-v1.6-mistral-7b-hf", 1, 33.17984878151546),
2020
("llava-hf/llava-v1.6-vicuna-7b-hf", 1, 35.00608681379742),
2121
("llava-hf/llava-v1.6-vicuna-13b-hf", 1, 23.527610042925),
2222
],
2323
"fp8": [
24-
("llava-hf/llava-1.5-7b-hf", 1, 115.48515989461843),
25-
("llava-hf/llava-1.5-13b-hf", 1, 78.2635142547838),
24+
("llava-hf/llava-1.5-7b-hf", 1, 98.72578382705062),
25+
("llava-hf/llava-1.5-13b-hf", 1, 67.20488222876344),
2626
("llava-hf/llava-v1.6-mistral-7b-hf", 1, 45.011551008367084),
2727
("llava-hf/llava-v1.6-vicuna-7b-hf", 1, 45.18544502949674),
2828
("llava-hf/llava-v1.6-vicuna-13b-hf", 1, 30.9535718774675),

0 commit comments

Comments
 (0)