Skip to content

Commit

Permalink
fixed mmlu generative response extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
RawthiL committed Nov 18, 2024
1 parent 62b4364 commit 1b7bae7
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
12 changes: 12 additions & 0 deletions lm_eval/tasks/mmlu/generative/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
filter_list:
- name: get_response
filter:
# Filter everything after the first break line
- function: "regex"
regex_pattern: "^(.*?)(?=\\n|$)"
# Remove leading white spaces
- function: remove_whitespace
# function to ignore right white spaces or line breaks
- function: "regex"
regex_pattern: "^(.*?)\\s*$"
- function: take_first
metadata:
version: 2.0
dataset_kwargs:
Expand Down
23 changes: 14 additions & 9 deletions lm_eval/tasks/mmlu/generative/_mmlu.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
group: mmlu_generative
group: mmlu_fix_generative
group_alias: mmlu (generative)
task:
- group: stem
task:
- mmlu_stem_generative
- mmlu_stem_fix_generative
aggregate_metric_list:
- metric: acc
- metric: exact_match
weight_by_size: True
filter_list: get_response
- group: other
task:
- mmlu_other_generative
- mmlu_other_fix_generative
aggregate_metric_list:
- metric: acc
- metric: exact_match
weight_by_size: True
filter_list: get_response
- group: social sciences
task:
- mmlu_social_sciences_generative
- mmlu_social_sciences_fix_generative
aggregate_metric_list:
- metric: acc
- metric: exact_match
weight_by_size: True
filter_list: get_response
- group: humanities
task:
- mmlu_humanities_generative
- mmlu_humanities_fix_generative
aggregate_metric_list:
- metric: acc
- metric: exact_match
weight_by_size: True
filter_list: get_response
aggregate_metric_list:
- aggregation: mean
metric: exact_match
weight_by_size: True
filter_list: get_response
metadata:
version: 2

0 comments on commit 1b7bae7

Please sign in to comment.