Skip to content

Commit

Permalink
Fix len reward (#385)
Browse files Browse the repository at this point in the history
* Rename solutions to solution for `len_reward`

* Fix docstring for len_reward

* Update src/open_r1/rewards.py

Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com>

---------

Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com>
Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com>
  • Loading branch information
3 people authored Feb 20, 2025
1 parent d76ecc1 commit 45a32ee
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/open_r1/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ def reasoning_steps_reward(completions, **kwargs):
return [min(1.0, count / 3) for count in matches]


def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs) -> float:
def len_reward(completions: list[Dict[str, str]], solution: list[str], **kwargs) -> float:
"""Compute length-based rewards to discourage overthinking and promote token efficiency.
Taken from from the Kimi 1.5 tech report: https://arxiv.org/abs/2501.12599
Args:
completions: List of model completions
solutions: List of ground truth solutions
solution: List of ground truth solutions
Returns:
List of rewards where:
Expand All @@ -103,7 +103,7 @@ def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs

# First check correctness of answers
correctness = []
for content, sol in zip(contents, solutions):
for content, sol in zip(contents, solution):
gold_parsed = parse(
sol,
extraction_mode="first_match",
Expand Down

0 comments on commit 45a32ee

Please sign in to comment.