Skip to content

Commit e45ed6e

Browse files
fix: resolve duplicate task names and add safeguards. (#3394)
* fix: resolve duplicate task names and add safeguards. This commit fixes 144 duplicate task names across MMLU-Redux and Flores translation benchmarks, and adds warnings and safeguards for more deterministic behavior. Changes: - Renamed all MMLU-Redux tasks from `mmlu_*_generative` to `mmlu_redux_*_generative` (57 tasks) to avoid conflicts with original MMLU tasks that use different datasets (cais/mmlu vs edinburgh-dawg/mmlu-redux-2.0) - Fixed Flores translation task duplicates by prefixing with benchmark name (e.g., `flores_ca-pt` → `catalan_bench_flores_ca-pt`). Updated generation scripts and regenerated YAMLs for catalan_bench, portuguese_bench, basque_bench, spanish_bench, and galician_bench - Added duplicate detection in TaskManager._get_task_and_group() that warns users when duplicate task names are found, showing both file paths for easier debugging - Made directory walk deterministic by sorting dirs and file_list in os.walk() to ensure consistent task loading order across different filesystems and operating systems The duplicate MMLU-Redux tasks were particularly problematic as they used different datasets but identical names, causing silent conflicts where users would unknowingly run the wrong benchmark variant. * Fix tags and task names in group info.
1 parent b529c19 commit e45ed6e

File tree

216 files changed

+292
-551
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

216 files changed

+292
-551
lines changed

lm_eval/tasks/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
490490
tasks_and_groups = collections.defaultdict()
491491
for root, dirs, file_list in os.walk(task_dir):
492492
dirs[:] = [d for d in dirs if d not in ignore_dirs]
493+
dirs.sort() # Sort directories for deterministic traversal order
494+
file_list.sort() # Sort files for consistent processing order
493495
for f in file_list:
494496
if f.endswith(".yaml"):
495497
yaml_path = os.path.join(root, f)
@@ -528,6 +530,13 @@ def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
528530
elif self._config_is_task(config):
529531
# This is a task config
530532
task = config["task"]
533+
if task in tasks_and_groups:
534+
eval_logger.warning(
535+
f"Duplicate task name '{task}' found. "
536+
f"Already registered from: {tasks_and_groups[task]['yaml_path']}. "
537+
f"Skipping duplicate from: {yaml_path}"
538+
)
539+
continue
531540
tasks_and_groups[task] = {
532541
"type": "task",
533542
"yaml_path": yaml_path,

lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
294294
# "group": [f"{BENCH_NAME}_bench", f"{BENCH_NAME}_bench_flores"],
295295
# "group": "flores_eu",
296296
"include": "_flores_common_yaml",
297-
"task": f"flores_{lang_pair_name}",
297+
"task": f"basque_bench_flores_{lang_pair_name}",
298298
"doc_to_text": doc_to_text(src, tgt),
299299
"doc_to_target": doc_to_target(tgt),
300300
},

lm_eval/tasks/basque_bench/flores_eu/flores_ca-eu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_ca-eu
3+
task: basque_bench_flores_ca-eu
44
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
55
66
Basque sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_de-eu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_de-eu
3+
task: basque_bench_flores_de-eu
44
doc_to_text: 'German sentence: {{sentence_deu_Latn}}
55
66
Basque sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_en-eu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_en-eu
3+
task: basque_bench_flores_en-eu
44
doc_to_text: 'English sentence: {{sentence_eng_Latn}}
55
66
Basque sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_es-eu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_es-eu
3+
task: basque_bench_flores_es-eu
44
doc_to_text: 'Spanish sentence: {{sentence_spa_Latn}}
55
66
Basque sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_eu-ca.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_eu-ca
3+
task: basque_bench_flores_eu-ca
44
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
55
66
Catalan sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_eu-de.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_eu-de
3+
task: basque_bench_flores_eu-de
44
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
55
66
German sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_eu-en.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_eu-en
3+
task: basque_bench_flores_eu-en
44
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
55
66
English sentence:'

lm_eval/tasks/basque_bench/flores_eu/flores_eu-es.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated by `create-yamls.py`
22
include: _flores_common_yaml
3-
task: flores_eu-es
3+
task: basque_bench_flores_eu-es
44
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
55
66
Spanish sentence:'

0 commit comments

Comments
 (0)