fix(tasks): remove suites in listing tasks

bram-pramono · bram-pramono · commit 89a2007ebf95 · 2025-12-16T16:15:25.000+01:00
diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
@@ -80,34 +80,12 @@ def load_community_tasks():
 
 logger = logging.getLogger(__name__)
 
-# Helm, Bigbench, Harness are implementations following an evaluation suite setup
 # Original follows the original implementation as closely as possible
 # Leaderboard are the evaluations we fixed on the open llm leaderboard - you should get similar results
 # Community are for community added evaluations
 # Extended are for evaluations with custom logic
 # Custom is for all the experiments you might want to do!
 
-# Core suites - always available without extra dependencies
-CORE_SUITES = [
-    "helm",
-    "bigbench",
-    "harness",
-    "leaderboard",
-    "lighteval",
-    "original",
-    "extended",
-    "custom",
-    "test",
-]
-
-# Optional suites - may require extra dependencies
-OPTIONAL_SUITES = [
-    "community",
-    "multilingual",
-]
-
-DEFAULT_SUITES = CORE_SUITES + OPTIONAL_SUITES
-
 
 class Registry:
     """The Registry class is used to manage the task registry and get task classes."""
@@ -138,7 +116,6 @@ def __init__(
                     TASKS_TABLE = [
                         LightevalTaskConfig(
                             name="custom_task",
-                            suite="custom",
                             ...
                         )
                     ]
@@ -187,7 +164,7 @@ def _update_task_configs(self) -> dict[str, LightevalTaskConfig]:  # noqa: C901
         Now expects task specs in the form:
         - task|few_shot
         - task (defaults to few_shot=0)
-        Backwards-compat for suite|task|few_shot is preserved but the suite is ignored.
+        Backwards-compat for task|few_shot is preserved.
         """
         task_to_configs = collections.defaultdict(list)
 
@@ -259,7 +236,7 @@ def load_tasks(self) -> dict[str, LightevalTask]:
     @lru_cache
     def _task_superset_dict(self):
         """Returns:
-            dict[str, list[str]]: A dictionary where keys are task super set names (suite|task) and values are lists of task subset names (suite|task).
+            dict[str, list[str]]: A dictionary where keys are task super set names (task) and values are lists of task subset names (task).
 
         Example:
             {
@@ -276,11 +253,11 @@ def _expand_task_definition(self, task_definition: str):
         """
         Args:
             task_definition (str): Task definition to expand. In format:
-                - suite|task
-                - suite|task_superset (e.g lighteval|mmlu, which runs all the mmlu subtasks)
+                - task
+                - task_superset (e.g mmlu, which runs all the mmlu subtasks)
 
         Returns:
-            list[str]: List of task names (suite|task)
+            list[str]: List of task names (task)
         """
         # Try if it's a task superset
         tasks = self._task_superset_dict.get(task_definition, None)
@@ -379,80 +356,26 @@ def load_all_task_configs(
         logger.info(f"Loaded {len(loaded_configs)} task configs in {time_end - time_start:.1f} seconds")
         return loaded_configs
 
-    def print_all_tasks(self, suites: str | None = None):
-        """Print all the tasks in the task registry.
-
-        Args:
-            suites: Comma-separated list of suites to display. If None, shows core suites only.
-                   Use 'all' to show all available suites (core + optional).
-                   Special handling for 'multilingual' suite with dependency checking.
-        """
-        # Parse requested suites
-        if suites is None:
-            requested_suites = CORE_SUITES.copy()
-        else:
-            requested_suites = [s.strip() for s in suites.split(",")]
-
-            # Handle 'all' special case
-            if "all" in requested_suites:
-                requested_suites = DEFAULT_SUITES.copy()
+    def print_all_tasks(self):
+        """Print all the tasks in the task registry."""
 
-            # Check for multilingual dependencies if requested
-            if "multilingual" in requested_suites:
-                import importlib.util
+        # Get all tasks
+        all_tasks = sorted(list(self._task_registry.keys()))
 
-                if importlib.util.find_spec("langcodes") is None:
-                    logger.warning(
-                        "Multilingual tasks require additional dependencies (langcodes). "
-                        "Install them with: pip install langcodes"
-                    )
-                    requested_suites.remove("multilingual")
-
-        # Get all tasks and filter by requested suites
-        all_tasks = list(self._task_registry.keys())
-        non_suite_tasks, tasks_on_suite = self._group_tasks(all_tasks, requested_suites)
-
-        print(f"Displaying tasks for suites: {', '.join(requested_suites)}")
+        print(f"Displaying tasks:")
         print("=" * 60)
 
-        for suite, g in groupby(tasks_on_suite, lambda x: x.split("|")[0]):
-            tasks_in_suite = [name for name in g if name.split("|")[1]]  # Filter out dummy tasks
-            tasks_in_suite.sort()
-
-            print(f"\n* {suite}:")
-            if not tasks_in_suite:
-                print("  (no tasks in this suite)")
-            else:
-                for task_name in tasks_in_suite:
-                    print(f"  - {task_name}")
-
-        print("\n* Non suite tasks:")
-        for task_name in non_suite_tasks:
+        last_task = None
+        for task_name in all_tasks:
+            task_parts = task_name.split(":")
+            if last_task != task_parts[0]:
+                print("")
+                last_task = task_parts[0]
             print(f"  - {task_name}")
         # Print summary
-        total_tasks = len(tasks_on_suite) + len(non_suite_tasks)
+        total_tasks = len(all_tasks)
         print(f"\nTotal tasks displayed: {total_tasks}")
 
-    def _group_tasks(self, all_tasks: list[str], requested_suites: list[str]) -> tuple[list[str], list[str]]:
-        non_suite_tasks = []
-        tasks_on_suite = []
-        for task in all_tasks:
-            if task.split("|")[0] in requested_suites:
-                tasks_on_suite.append(task)
-            else:
-                non_suite_tasks.append(task)
-
-        # Ensure all requested suites are present (even if empty)
-        suites_in_registry = {name.split("|")[0] for name in tasks_on_suite}
-        for suite in requested_suites:
-            if suite not in suites_in_registry:
-                # We add a dummy task to make sure the suite is printed
-                tasks_on_suite.append(f"{suite}|")
-
-        tasks_on_suite.sort()
-
-        return non_suite_tasks, tasks_on_suite
-
     def get_tasks_dump(self) -> list[dict]:  # noqa: C901
         """Get all task names, metadata, and docstrings as a Python object.