@@ -80,34 +80,12 @@ def load_community_tasks():
8080
8181logger = logging .getLogger (__name__ )
8282
83- # Helm, Bigbench, Harness are implementations following an evaluation suite setup
8483# Original follows the original implementation as closely as possible
8584# Leaderboard are the evaluations we fixed on the open llm leaderboard - you should get similar results
8685# Community are for community added evaluations
8786# Extended are for evaluations with custom logic
8887# Custom is for all the experiments you might want to do!
8988
90- # Core suites - always available without extra dependencies
91- CORE_SUITES = [
92- "helm" ,
93- "bigbench" ,
94- "harness" ,
95- "leaderboard" ,
96- "lighteval" ,
97- "original" ,
98- "extended" ,
99- "custom" ,
100- "test" ,
101- ]
102-
103- # Optional suites - may require extra dependencies
104- OPTIONAL_SUITES = [
105- "community" ,
106- "multilingual" ,
107- ]
108-
109- DEFAULT_SUITES = CORE_SUITES + OPTIONAL_SUITES
110-
11189
11290class Registry :
11391 """The Registry class is used to manage the task registry and get task classes."""
@@ -138,7 +116,6 @@ def __init__(
138116 TASKS_TABLE = [
139117 LightevalTaskConfig(
140118 name="custom_task",
141- suite="custom",
142119 ...
143120 )
144121 ]
@@ -187,7 +164,7 @@ def _update_task_configs(self) -> dict[str, LightevalTaskConfig]: # noqa: C901
187164 Now expects task specs in the form:
188165 - task|few_shot
189166 - task (defaults to few_shot=0)
190- Backwards-compat for suite| task|few_shot is preserved but the suite is ignored .
167+ Backwards-compat for task|few_shot is preserved.
191168 """
192169 task_to_configs = collections .defaultdict (list )
193170
@@ -259,7 +236,7 @@ def load_tasks(self) -> dict[str, LightevalTask]:
259236 @lru_cache
260237 def _task_superset_dict (self ):
261238 """Returns:
262- dict[str, list[str]]: A dictionary where keys are task super set names (suite| task) and values are lists of task subset names (suite| task).
239+ dict[str, list[str]]: A dictionary where keys are task super set names (task) and values are lists of task subset names (task).
263240
264241 Example:
265242 {
@@ -276,11 +253,11 @@ def _expand_task_definition(self, task_definition: str):
276253 """
277254 Args:
278255 task_definition (str): Task definition to expand. In format:
279- - suite| task
280- - suite| task_superset (e.g lighteval| mmlu, which runs all the mmlu subtasks)
256+ - task
257+ - task_superset (e.g mmlu, which runs all the mmlu subtasks)
281258
282259 Returns:
283- list[str]: List of task names (suite| task)
260+ list[str]: List of task names (task)
284261 """
285262 # Try if it's a task superset
286263 tasks = self ._task_superset_dict .get (task_definition , None )
@@ -379,80 +356,26 @@ def load_all_task_configs(
379356 logger .info (f"Loaded { len (loaded_configs )} task configs in { time_end - time_start :.1f} seconds" )
380357 return loaded_configs
381358
382- def print_all_tasks (self , suites : str | None = None ):
383- """Print all the tasks in the task registry.
384-
385- Args:
386- suites: Comma-separated list of suites to display. If None, shows core suites only.
387- Use 'all' to show all available suites (core + optional).
388- Special handling for 'multilingual' suite with dependency checking.
389- """
390- # Parse requested suites
391- if suites is None :
392- requested_suites = CORE_SUITES .copy ()
393- else :
394- requested_suites = [s .strip () for s in suites .split ("," )]
395-
396- # Handle 'all' special case
397- if "all" in requested_suites :
398- requested_suites = DEFAULT_SUITES .copy ()
359+ def print_all_tasks (self ):
360+ """Print all the tasks in the task registry."""
399361
400- # Check for multilingual dependencies if requested
401- if "multilingual" in requested_suites :
402- import importlib .util
362+ # Get all tasks
363+ all_tasks = sorted (list (self ._task_registry .keys ()))
403364
404- if importlib .util .find_spec ("langcodes" ) is None :
405- logger .warning (
406- "Multilingual tasks require additional dependencies (langcodes). "
407- "Install them with: pip install langcodes"
408- )
409- requested_suites .remove ("multilingual" )
410-
411- # Get all tasks and filter by requested suites
412- all_tasks = list (self ._task_registry .keys ())
413- non_suite_tasks , tasks_on_suite = self ._group_tasks (all_tasks , requested_suites )
414-
415- print (f"Displaying tasks for suites: { ', ' .join (requested_suites )} " )
365+ print (f"Displaying tasks:" )
416366 print ("=" * 60 )
417367
418- for suite , g in groupby (tasks_on_suite , lambda x : x .split ("|" )[0 ]):
419- tasks_in_suite = [name for name in g if name .split ("|" )[1 ]] # Filter out dummy tasks
420- tasks_in_suite .sort ()
421-
422- print (f"\n * { suite } :" )
423- if not tasks_in_suite :
424- print (" (no tasks in this suite)" )
425- else :
426- for task_name in tasks_in_suite :
427- print (f" - { task_name } " )
428-
429- print ("\n * Non suite tasks:" )
430- for task_name in non_suite_tasks :
368+ last_task = None
369+ for task_name in all_tasks :
370+ task_parts = task_name .split (":" )
371+ if last_task != task_parts [0 ]:
372+ print ("" )
373+ last_task = task_parts [0 ]
431374 print (f" - { task_name } " )
432375 # Print summary
433- total_tasks = len (tasks_on_suite ) + len ( non_suite_tasks )
376+ total_tasks = len (all_tasks )
434377 print (f"\n Total tasks displayed: { total_tasks } " )
435378
436- def _group_tasks (self , all_tasks : list [str ], requested_suites : list [str ]) -> tuple [list [str ], list [str ]]:
437- non_suite_tasks = []
438- tasks_on_suite = []
439- for task in all_tasks :
440- if task .split ("|" )[0 ] in requested_suites :
441- tasks_on_suite .append (task )
442- else :
443- non_suite_tasks .append (task )
444-
445- # Ensure all requested suites are present (even if empty)
446- suites_in_registry = {name .split ("|" )[0 ] for name in tasks_on_suite }
447- for suite in requested_suites :
448- if suite not in suites_in_registry :
449- # We add a dummy task to make sure the suite is printed
450- tasks_on_suite .append (f"{ suite } |" )
451-
452- tasks_on_suite .sort ()
453-
454- return non_suite_tasks , tasks_on_suite
455-
456379 def get_tasks_dump (self ) -> list [dict ]: # noqa: C901
457380 """Get all task names, metadata, and docstrings as a Python object.
458381
0 commit comments