Skip to content

Commit

Permalink
Pin lighteval
Browse files Browse the repository at this point in the history
  • Loading branch information
lewtun committed Feb 20, 2025
1 parent 77b6638 commit 345a8dc
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ uv pip install setuptools && uv pip install flash-attn --no-build-isolation
This will also install PyTorch `v2.5.1` and it is **very important** to use this version since the vLLM binaries are compiled for it. You can then install the remaining dependencies for your specific use case via `pip install -e .[LIST OF MODES]`. For most contributors, we recommend:

```shell
uv pip install -e ".[dev]"
GIT_LFS_SKIP_SMUDGE=1 uv pip install -e ".[dev]"
```

Next, log into your Hugging Face and Weights and Biases accounts as follows:
Expand Down Expand Up @@ -400,7 +400,7 @@ To reproduce these results use the following command:
```shell
NUM_GPUS=1 # Set to 8 for 32B and 70B models, or data_parallel_size=8 with the smaller models for speed
MODEL=deepseek-ai/{model_name}
MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=32768,gpu_memory_utilization=0.8,tensor_parallel_size=$NUM_GPUS,generation_parameters={temperature:0.6,top_p:0.95}"
MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=32768,gpu_memory_utilization=0.8,data_parallel_size=$NUM_GPUS,generation_parameters={temperature:0.6,top_p:0.95}"
OUTPUT_DIR=data/evals/$MODEL

lighteval vllm $MODEL_ARGS "extended|lcb:codegeneration|0|0" \
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"langdetect", # Needed for LightEval's extended tasks
"latex2sympy2_extended>=1.0.6",
"liger_kernel==0.5.2",
"lighteval @ git+https://github.com/huggingface/lighteval.git@main",
"lighteval @ git+https://github.com/huggingface/lighteval.git@e89a9876ada1da511df0708955a63012739222b5",
"math-verify==0.5.2", # Used for math verification in grpo
"packaging>=23.0",
"parameterized>=0.9.0",
Expand Down
8 changes: 4 additions & 4 deletions slurm/evaluate.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ NUM_GPUS=$(nvidia-smi -L | wc -l)
if [ "$TENSOR_PARALLEL" = "True" ]; then
# use TP to shard model across NUM_GPUS
export VLLM_WORKER_MULTIPROC_METHOD=spawn
MODEL_ARGS="pretrained=$MODEL_ID,revision=$MODEL_REVISION,trust_remote_code=$TRUST_REMOTE_CODE,dtype=bfloat16,tensor_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8"
MODEL_ARGS="pretrained=$MODEL_ID,revision=$MODEL_REVISION,trust_remote_code=$TRUST_REMOTE_CODE,dtype=bfloat16,tensor_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens: 32768,temperature: 0}"
else
MODEL_ARGS="pretrained=$MODEL_ID,revision=$MODEL_REVISION,trust_remote_code=$TRUST_REMOTE_CODE,dtype=bfloat16,data_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8"
MODEL_ARGS="pretrained=$MODEL_ID,revision=$MODEL_REVISION,trust_remote_code=$TRUST_REMOTE_CODE,dtype=bfloat16,data_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens: 32768,temperature: 0}"
fi

LM_EVAL_REPO_ID="open-r1/open-r1-eval-leaderboard"
Expand All @@ -48,14 +48,14 @@ echo "Eval results will be saved to $OUTPUT_DIR"
# Check if "custom" is a substring of TASKS
if [[ $TASKS == *"custom"* ]]; then
echo "Custom task detected. Running custom task evaluation script ..."
lighteval vllm $MODEL_ARGS $TASKS \
lighteval vllm "$MODEL_ARGS" $TASKS \
--custom-tasks "src/open_r1/evaluate.py" \
--use-chat-template \
--output-dir $OUTPUT_DIR \
--save-details \
${7:+--system-prompt "$7"}
else
lighteval vllm $MODEL_ARGS $TASKS \
lighteval vllm "$MODEL_ARGS" $TASKS \
--use-chat-template \
--output-dir $OUTPUT_DIR \
--save-details \
Expand Down

0 comments on commit 345a8dc

Please sign in to comment.