Skip to content

Commit

Permalink
Add AIME25
Browse files Browse the repository at this point in the history
  • Loading branch information
lewtun committed Feb 10, 2025
1 parent db19392 commit 1798f0e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
15 changes: 15 additions & 0 deletions src/open_r1/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,20 @@ def gpqa_prompt_fn(line, task_name: str = None):
metric=[expr_gold_metric],
version=1,
)
aime25 = LightevalTaskConfig(
name="aime25",
suite=["custom"],
prompt_function=aime_prompt_fn,
hf_repo="TIGER-Lab/AIME25",
hf_subset="default",
hf_avail_splits=["train"],
evaluation_splits=["train"],
few_shots_split=None,
few_shots_select=None,
generation_size=32768,
metric=[expr_gold_metric],
version=1,
)
math_500 = LightevalTaskConfig(
name="math_500",
suite=["custom"],
Expand Down Expand Up @@ -141,6 +155,7 @@ def gpqa_prompt_fn(line, task_name: str = None):
# Add tasks to the table
TASKS_TABLE = []
TASKS_TABLE.append(aime24)
TASKS_TABLE.append(aime25)
TASKS_TABLE.append(math_500)
TASKS_TABLE.append(gpqa_diamond)

Expand Down
1 change: 1 addition & 0 deletions src/open_r1/utils/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def register_lighteval_task(

register_lighteval_task(LIGHTEVAL_TASKS, "custom", "math_500", "math_500", 0)
register_lighteval_task(LIGHTEVAL_TASKS, "custom", "aime24", "aime24", 0)
register_lighteval_task(LIGHTEVAL_TASKS, "custom", "aime25", "aime25", 0)
register_lighteval_task(LIGHTEVAL_TASKS, "custom", "gpqa", "gpqa:diamond", 0)


Expand Down

0 comments on commit 1798f0e

Please sign in to comment.