Skip to content

Commit 89a2007

Browse files
committed
fix(tasks): remove suites in listing tasks
1 parent b1dd57d commit 89a2007

File tree

1 file changed

+17
-94
lines changed

1 file changed

+17
-94
lines changed

src/lighteval/tasks/registry.py

Lines changed: 17 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -80,34 +80,12 @@ def load_community_tasks():
8080

8181
logger = logging.getLogger(__name__)
8282

83-
# Helm, Bigbench, Harness are implementations following an evaluation suite setup
8483
# Original follows the original implementation as closely as possible
8584
# Leaderboard are the evaluations we fixed on the open llm leaderboard - you should get similar results
8685
# Community are for community added evaluations
8786
# Extended are for evaluations with custom logic
8887
# Custom is for all the experiments you might want to do!
8988

90-
# Core suites - always available without extra dependencies
91-
CORE_SUITES = [
92-
"helm",
93-
"bigbench",
94-
"harness",
95-
"leaderboard",
96-
"lighteval",
97-
"original",
98-
"extended",
99-
"custom",
100-
"test",
101-
]
102-
103-
# Optional suites - may require extra dependencies
104-
OPTIONAL_SUITES = [
105-
"community",
106-
"multilingual",
107-
]
108-
109-
DEFAULT_SUITES = CORE_SUITES + OPTIONAL_SUITES
110-
11189

11290
class Registry:
11391
"""The Registry class is used to manage the task registry and get task classes."""
@@ -138,7 +116,6 @@ def __init__(
138116
TASKS_TABLE = [
139117
LightevalTaskConfig(
140118
name="custom_task",
141-
suite="custom",
142119
...
143120
)
144121
]
@@ -187,7 +164,7 @@ def _update_task_configs(self) -> dict[str, LightevalTaskConfig]: # noqa: C901
187164
Now expects task specs in the form:
188165
- task|few_shot
189166
- task (defaults to few_shot=0)
190-
Backwards-compat for suite|task|few_shot is preserved but the suite is ignored.
167+
Backwards-compat for task|few_shot is preserved.
191168
"""
192169
task_to_configs = collections.defaultdict(list)
193170

@@ -259,7 +236,7 @@ def load_tasks(self) -> dict[str, LightevalTask]:
259236
@lru_cache
260237
def _task_superset_dict(self):
261238
"""Returns:
262-
dict[str, list[str]]: A dictionary where keys are task super set names (suite|task) and values are lists of task subset names (suite|task).
239+
dict[str, list[str]]: A dictionary where keys are task super set names (task) and values are lists of task subset names (task).
263240
264241
Example:
265242
{
@@ -276,11 +253,11 @@ def _expand_task_definition(self, task_definition: str):
276253
"""
277254
Args:
278255
task_definition (str): Task definition to expand. In format:
279-
- suite|task
280-
- suite|task_superset (e.g lighteval|mmlu, which runs all the mmlu subtasks)
256+
- task
257+
- task_superset (e.g mmlu, which runs all the mmlu subtasks)
281258
282259
Returns:
283-
list[str]: List of task names (suite|task)
260+
list[str]: List of task names (task)
284261
"""
285262
# Try if it's a task superset
286263
tasks = self._task_superset_dict.get(task_definition, None)
@@ -379,80 +356,26 @@ def load_all_task_configs(
379356
logger.info(f"Loaded {len(loaded_configs)} task configs in {time_end - time_start:.1f} seconds")
380357
return loaded_configs
381358

382-
def print_all_tasks(self, suites: str | None = None):
383-
"""Print all the tasks in the task registry.
384-
385-
Args:
386-
suites: Comma-separated list of suites to display. If None, shows core suites only.
387-
Use 'all' to show all available suites (core + optional).
388-
Special handling for 'multilingual' suite with dependency checking.
389-
"""
390-
# Parse requested suites
391-
if suites is None:
392-
requested_suites = CORE_SUITES.copy()
393-
else:
394-
requested_suites = [s.strip() for s in suites.split(",")]
395-
396-
# Handle 'all' special case
397-
if "all" in requested_suites:
398-
requested_suites = DEFAULT_SUITES.copy()
359+
def print_all_tasks(self):
360+
"""Print all the tasks in the task registry."""
399361

400-
# Check for multilingual dependencies if requested
401-
if "multilingual" in requested_suites:
402-
import importlib.util
362+
# Get all tasks
363+
all_tasks = sorted(list(self._task_registry.keys()))
403364

404-
if importlib.util.find_spec("langcodes") is None:
405-
logger.warning(
406-
"Multilingual tasks require additional dependencies (langcodes). "
407-
"Install them with: pip install langcodes"
408-
)
409-
requested_suites.remove("multilingual")
410-
411-
# Get all tasks and filter by requested suites
412-
all_tasks = list(self._task_registry.keys())
413-
non_suite_tasks, tasks_on_suite = self._group_tasks(all_tasks, requested_suites)
414-
415-
print(f"Displaying tasks for suites: {', '.join(requested_suites)}")
365+
print(f"Displaying tasks:")
416366
print("=" * 60)
417367

418-
for suite, g in groupby(tasks_on_suite, lambda x: x.split("|")[0]):
419-
tasks_in_suite = [name for name in g if name.split("|")[1]] # Filter out dummy tasks
420-
tasks_in_suite.sort()
421-
422-
print(f"\n* {suite}:")
423-
if not tasks_in_suite:
424-
print(" (no tasks in this suite)")
425-
else:
426-
for task_name in tasks_in_suite:
427-
print(f" - {task_name}")
428-
429-
print("\n* Non suite tasks:")
430-
for task_name in non_suite_tasks:
368+
last_task = None
369+
for task_name in all_tasks:
370+
task_parts = task_name.split(":")
371+
if last_task != task_parts[0]:
372+
print("")
373+
last_task = task_parts[0]
431374
print(f" - {task_name}")
432375
# Print summary
433-
total_tasks = len(tasks_on_suite) + len(non_suite_tasks)
376+
total_tasks = len(all_tasks)
434377
print(f"\nTotal tasks displayed: {total_tasks}")
435378

436-
def _group_tasks(self, all_tasks: list[str], requested_suites: list[str]) -> tuple[list[str], list[str]]:
437-
non_suite_tasks = []
438-
tasks_on_suite = []
439-
for task in all_tasks:
440-
if task.split("|")[0] in requested_suites:
441-
tasks_on_suite.append(task)
442-
else:
443-
non_suite_tasks.append(task)
444-
445-
# Ensure all requested suites are present (even if empty)
446-
suites_in_registry = {name.split("|")[0] for name in tasks_on_suite}
447-
for suite in requested_suites:
448-
if suite not in suites_in_registry:
449-
# We add a dummy task to make sure the suite is printed
450-
tasks_on_suite.append(f"{suite}|")
451-
452-
tasks_on_suite.sort()
453-
454-
return non_suite_tasks, tasks_on_suite
455-
456379
def get_tasks_dump(self) -> list[dict]: # noqa: C901
457380
"""Get all task names, metadata, and docstrings as a Python object.
458381

0 commit comments

Comments
 (0)