Merge branch 'develop' of https://github.com/ls1intum/Athena into doc…

…s/athena
ls1intum · Nov 12, 2023 · 08bb529 · 08bb529
2 parents c9438b1 + 89e7047
commit 08bb529
Show file tree

Hide file tree

Showing 46 changed files with 2,080 additions and 1,012 deletions.
diff --git a/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py b/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py
@@ -27,6 +27,7 @@ class HealthResponse(BaseModel):
     """
     Response indicating whether the Assessment Module Manager is healthy,
     and whether all the modules are healthy (i.e. reachable).
+    Additional information about the modules is also provided.
     """
     status: str = Field(const=True, default="ok", example="ok")
     modules: dict = Field(
@@ -35,7 +36,8 @@ class HealthResponse(BaseModel):
                 "module_example": {
                     "url": "http://localhost:5001",
                     "type": "programming",
-                    "healthy": True
+                    "healthy": True,
+                    "supportsEvaluation": True
                 }
             }
         ]
@@ -56,6 +58,7 @@ async def get_health() -> HealthResponse:
                 "url": module.url,
                 "type": module.type,
                 "healthy": await is_healthy(module),
+                "supportsEvaluation": module.supports_evaluation
             }
             for module in get_modules()
         }

diff --git a/assessment_module_manager/assessment_module_manager/module/list_modules.py b/assessment_module_manager/assessment_module_manager/module/list_modules.py
@@ -1,4 +1,6 @@
 import configparser
+import os
+
 from typing import List, cast
 from pathlib import Path
 
@@ -16,8 +18,9 @@ def list_modules() -> List[Module]:
     return [
         Module(
             name=module,
-            url=cast(AnyHttpUrl, modules_config[module]["url"]),
+            url=cast(AnyHttpUrl, os.environ.get(f"{module.upper()}_URL", modules_config[module]["url"])),
             type=ExerciseType(modules_config[module]["type"]),
+            supports_evaluation=modules_config[module].getboolean("supports_evaluation"),
         )
         for module in modules_config.sections()
     ]
diff --git a/assessment_module_manager/assessment_module_manager/module/module.py b/assessment_module_manager/assessment_module_manager/module/module.py
@@ -8,3 +8,4 @@ class Module(BaseModel):
     name: str = Field(example="module_example")
     url: AnyHttpUrl = Field(example="http://localhost:5001")
     type: ExerciseType = Field(example=ExerciseType.text)
+    supports_evaluation: bool = Field(description="Whether the module supports evaluation", example=True)
diff --git a/assessment_module_manager/modules.docker.ini b/assessment_module_manager/modules.docker.ini
@@ -1,19 +1,24 @@
 [module_example]
 url = http://module-example:5001
 type = programming
+supports_evaluation = true
 
 [module_programming_llm]
 url = http://module-programming-llm:5002
 type = programming
+supports_evaluation = false
 
 [module_text_llm]
 url = http://module-text-llm:5003
 type = text
+supports_evaluation = true
 
 [module_text_cofee]
 url = http://module-text-cofee:5004
 type = text
+supports_evaluation = false
 
 [module_programming_themisml]
 url = http://module-programming-themisml:5005
-type = programming
+type = programming
+supports_evaluation = false
diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini
@@ -1,19 +1,24 @@
 [module_example]
 url = http://localhost:5001
 type = programming
+supports_evaluation = true
 
 [module_programming_llm]
 url = http://localhost:5002
 type = programming
+supports_evaluation = false
 
 [module_text_llm]
 url = http://localhost:5003
 type = text
+supports_evaluation = true
 
 [module_text_cofee]
 url = http://localhost:5004
 type = text
+supports_evaluation = false
 
 [module_programming_themisml]
 url = http://localhost:5005
 type = programming
+supports_evaluation = false
diff --git a/athena/athena/__init__.py b/athena/athena/__init__.py
@@ -6,7 +6,7 @@
 from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction
 from .metadata import emit_meta, get_meta
 from .experiment import get_experiment_environment
-from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider  # type: ignore
+from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider  # type: ignore
 
 
 @app.get("/")
@@ -28,6 +28,7 @@ def run_module():
     "feedback_consumer",
     "feedback_provider",
     "config_schema_provider",
+    "evaluation_provider",
     "emit_meta",
     "get_meta",
     "get_experiment_environment",

diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py
@@ -358,4 +358,63 @@ def config_schema_provider(cls: Type[C]) -> Type[C]:
     async def wrapper():
         return cls.schema()
 
-    return cls
+    return cls
+
+
+def evaluation_provider(func: Union[
+    Callable[[E, S, List[F], List[F]], Any],
+    Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]]
+]):
+    """
+    Provide evaluated feedback to the Assessment Module Manager.
+    
+    Note: The evaluation provider is usually called during the research and development phase (by the Playground).
+    Return arbitrary evaluation results.
+
+    This decorator can be used with several types of functions: synchronous or asynchronous.
+
+    Examples:
+        Below are some examples of possible functions that you can decorate with this decorator:
+
+        Without using module config (both synchronous and asynchronous forms):
+        >>> @evaluation_provider
+        ... def sync_evaluate_feedback(
+        ...     exercise: Exercise, submission: Submission, 
+        ...     true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]
+        ... ) -> Any:
+        ...     # evaluate predicted feedback here and return evaluation results
+
+        >>> @feedback_provider
+        ... async def async_evaluate_feedback(
+        ...     exercise: Exercise, submission: Submission, 
+        ...     true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]
+        ... ) -> Any:
+        ...     # evaluate predicted feedback here and return evaluation results
+    """
+    exercise_type = inspect.signature(func).parameters["exercise"].annotation
+    submission_type = inspect.signature(func).parameters["submission"].annotation
+    feedback_type = inspect.signature(func).parameters["predicted_feedbacks"].annotation.__args__[0]
+
+    @app.post("/evaluation", responses=module_responses)
+    @authenticated
+    @with_meta
+    async def wrapper(
+            exercise: exercise_type, 
+            submission: submission_type, 
+            true_feedbacks: List[feedback_type], 
+            predicted_feedbacks: List[feedback_type],
+        ):
+        # Retrieve existing metadata for the exercise, submission and feedback
+        exercise.meta.update(get_stored_exercise_meta(exercise) or {})
+        submission.meta.update(get_stored_submission_meta(submission) or {})
+        for feedback in true_feedbacks + predicted_feedbacks:
+            feedback.meta.update(get_stored_feedback_meta(feedback) or {})
+
+        # Call the actual provider
+        if inspect.iscoroutinefunction(func):
+            evaluation = await func(exercise, submission, true_feedbacks, predicted_feedbacks)
+        else:
+            evaluation = func(exercise, submission, true_feedbacks, predicted_feedbacks)
+
+        return evaluation
+    return wrapper
diff --git a/docs/images/load-anonymized-database-dump.png b/docs/images/load-anonymized-database-dump.png
diff --git a/docs/module/structure.rst b/docs/module/structure.rst
@@ -94,7 +94,7 @@ Example:
                 )
             ]
 
-Provide Config Schema
+Provide Config Schema (Optional)
 ~~~~~~~~~~~~~~~~~~~~~~
 Get a schema for config options of the module as json schema. The config complying to the schema can then be provided in the header of a request `X-Module-Config` to override the default values. The module can decorate one pydantic model with ``@config_schema_provider`` to provide the schema and should have default values set for all fields as default configuration. The configuration class can be appended to the function signature of all other decorators to provide the configuration to the function.
 
@@ -108,6 +108,37 @@ Example:
             debug: bool = Field(False, description="Whether the module is in debug mode.")
             ...
 
+Provide Evaluation (Optional)
+~~~~~~~~~~~~~~~~~~
+Get an arbitrary evaluation for a submission with historical ``true_feedback`` and feedback suggestions ``predicted_feedback``. The Playground would usually call this when conducting an evaluation during an experiment. The module will receive the request at the function annotated with ``@evaluation_provider``.
+
+If you want to have the ``/evaluation`` endpoint available during the Playground evaluation mode, you need to set ``supports_evaluation = true`` in the ``modules.ini`` and ``modules.docker.ini`` files.
+
+Example: 
+    .. code-block:: python
+
+        from athena import *
+
+        @evaluation_provider
+        def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any:
+            # Do something with the true and predicted feedback and return the evaluation result
+            ...
+            # Example: Generate some example evaluation result
+            evaluation_results = []
+            true_feedback_embeddings = [random.random() for _ in true_feedbacks] 
+            predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks]
+            for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings):
+                feedback_evaluation = {
+                    "feedback_id": feedback.id,
+                    "embedding": embedding,
+                    "has_match": len([t for t in true_feedback_embeddings if abs(t - embedding) < 0.1]) > 0,
+                    "correctness": random.random()
+                }
+                evaluation_results.append(feedback_evaluation)
+            ...
+            # Return arbitrary evaluation results
+            return evaluation_results
+
 Environment Variables
 ---------------------
 You should provide at least the following environment variables for your module to work properly:

diff --git a/docs/setup/evaluation.rst b/docs/setup/evaluation.rst
@@ -1,54 +1,130 @@
-Evaluation Data
+Evaluation Data for Athena Playground
 ===========================================
 
-The Playground comes bundled with a basic set of example data to test Athena's functionalities. For more comprehensive evaluation, you can load your own data or use anonymized data from `Artemis <https://github.com/ls1intum/Artemis>`_, an open-source LMS.
+The Athena Playground is equipped with a set of example data for initial testing. To conduct a more thorough evaluation, users have the option to use their own datasets or request anonymized data from `Artemis <https://github.com/ls1intum/Artemis>`_, an open-source LMS.
 
-Example Data
--------------------------------------------
-This data is provided within the `playground/data/example` directory and is automatically utilized when launching the Playground. 
 
-Evaluation Data
--------------------------------------------
-The `playground/data/evaluation` directory is designated for your custom data used for evaluation purposes. Initially, it's left empty for you to populate. 
+Example Data
+------------
 
-Artemis Evaluation Data
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-If you're integrating with Artemis LMS and would like to evaluate their data, you can request an anonymized database dump from the Artemis team. This request requires a valid reason and a signed data protection agreement (NDA). For further details, please get in touch with the Artemis team.
+Located in ``playground/data/example``, this default dataset is automatically used when the Playground is initiated.
 
-Once the database dump is acquired, follow these steps to export the data to the Playground:
 
-1. **Load the Database Dump:**
+Evaluation Data
+---------------
 
-    .. code-block:: bash
+The directory ``playground/data/evaluation`` is reserved for your custom data. It is initially empty, ready to be filled with your evaluation datasets.
 
-        npm run export:artemis:1-load-anonymized-database-dump
 
-    This command loads the data into your local MySQL database. You can use the same database as Artemis.
+Exporting Evaluation Data from Artemis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-2. **Export the Data:**
+To evaluate using data from Artemis, you can request an anonymized database dump, contingent on a valid justification and a signed data protection agreement. Contact the Artemis team for details.
 
-    .. code-block:: bash
+Steps to Export Evaluation Data from Artemis:
+"""""""""""""""""""""""""""""""""""""""""""""
 
-        npm run export:artemis:2-export-evaluation-data
+1. **Setup a MySQL database:**
+   Create a new MySQL database and user. You can use the same database instance as Artemis or a separate one. You can follow the instructions in the `Artemis documentation <https://docs.artemis.cit.tum.de/dev/setup/database.html#mysql-setup>`_ to set up a MySQL database.
 
-    This exports exercises listed under `playground/scripts/artemis/evaluation_data` to the `playground/data/evaluation` directory, where you can use it for evaluation purposes.
 
-Artemis Programming Exercises
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Artemis programming exercises are not included in the anonymized database dump. To access these exercises, you'll need to request them separately from the Artemis team. Once you have the programming exercises, an instructor from the course can export them using the following commands:
+2. **Load the Database Dump:**
+   Use the command below to import the anonymized data into your local MySQL database. You will only need to do this once to populate the database. The script will ask you for the database ``host``, ``port``, ``user``, ``password``, and ``database``. Additionally, you will need to provide the path to the anonymized database dump, e.g. ``/home/user/artemis-database-dump.sql``.
 
-1. **Download the Repositories:**
+   .. code-block:: bash
 
-    .. code-block:: bash
+       npm run export:artemis:1-load-anonymized-database-dump
 
-        npm run export:artemis:3-download-programming-repositories
+.. image:: ../images/load-anonymized-database-dump.png
+    :width: 500px
+    :alt: Example terminal screenshot of the command to load the anonymized database dump
+    :align: center
 
-    This command exports the programming exercises' materials and submissions to the `playground/data/evaluation` directory. The instructor should then zip these and send them to you.
+3. **Export the Data:**
+   This command exports the data specified in ``playground/scripts/artemis/evaluation_data/text_exercises.json`` to your local ``playground/data/evaluation`` directory.
 
-2. **Link the Repositories:**
+   .. code-block:: bash
 
-    .. code-block:: bash
+       npm run export:artemis:2-export-evaluation-data
 
-        npm run export:artemis:4-link-programming-repositories
 
-    This command links the repositories to the `exercise-*.json` files and validates if there are any missing repositories.
+Artemis Programming Exercises
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Programming exercises are not part of the anonymized database dump and must be requested separately from the Artemis team. You can find the selected exercise and their participation IDs for export in ``playground/scripts/artemis/evaluation_data/programming_exercises.json``.
+
+Steps for Instructors to Export Programming Exercises:
+""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+4. **Download Repositories:**
+   Instructors can download materials and submissions from Artemis using the command below, then zip and transfer them to you. Keep in mind that this command will take a long time to run if there are many participations to download.
+
+   .. code-block:: bash
+
+       npm run export:artemis:3-download-programming-repositories
+
+5. **Link the Repositories:**
+   Put the downloaded repositories in the ``playground/data/evaluation`` directory and link them to the respective exercises using the following command. This command will also validate if there are any missing repositories. Without this step, the programming repositories will not be available in the Playground.
+
+   .. code-block:: bash
+
+       npm run export:artemis:4-link-programming-repositories
+
+
+Generating ``programming_exercises.json``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The SQL script provided below can be adapted to generate a ``programming_exercises.json`` file, located at ``playground/scripts/artemis/evaluation_data/programming_exercises.json``. Similar logic applies to create ``text_exercises.json``. The script collects data on selected exercises, aggregates participation data, and formats it into a JSON structure suitable for export scripts.
+
+**Note:** The provided SQL script is an example and should be tailored to include the specific IDs of the programming exercises you wish to export. You might want to reduce the number of participations to export if you don't need all of them. ``anonymized_artemis`` should be replaced with the name of your database.
+
+.. code-block:: sql
+    
+    WITH temp_course_exercises AS (
+        SELECT
+            DISTINCT e.id,
+            c.id AS course_id,
+            0 as is_exam_exercise -- Course exercises
+        FROM
+            anonymized_artemis.exercise e
+            JOIN anonymized_artemis.course c ON e.course_id = c.id
+    ),
+    temp_exam_exercises AS (
+        SELECT
+            DISTINCT e.id,
+            c.id AS course_id,
+            1 as is_exam_exercise -- Exam exercises
+        FROM
+            anonymized_artemis.course c
+            JOIN anonymized_artemis.exam ex ON ex.course_id = c.id
+            JOIN anonymized_artemis.exercise_group eg ON eg.exam_id = ex.id
+            JOIN anonymized_artemis.exercise e ON e.exercise_group_id = eg.id
+    ),
+    temp_exercises AS (
+        SELECT * FROM temp_course_exercises
+        UNION
+        SELECT * FROM temp_exam_exercises
+    )
+    SELECT JSON_OBJECT(
+        c.title, JSON_OBJECT(
+            'course_id', c.id,
+            'semester', c.semester,
+            'exercises', JSON_ARRAYAGG(
+                JSON_OBJECT(
+                    'id', e.id,
+                    'title', e.title,
+                    'is_exam_exercise', te.is_exam_exercise
+                )
+            ),
+            'participations', JSON_ARRAYAGG(
+                (SELECT JSON_ARRAYAGG(p.id)
+                 FROM anonymized_artemis.participation p -- Note: This contains also participations that are maybe unneccessary  
+                 WHERE p.exercise_id = e.id)
+            )
+        )
+    )
+    FROM temp_exercises te
+    JOIN anonymized_artemis.exercise e ON te.id = e.id
+    JOIN anonymized_artemis.course c ON c.id = te.course_id
+    WHERE e.id IN (2610, 3782, 2111, 2104, 3187, 3781, 6344, 6433, 3942, 3693, 4864, 4896, 3913, 3914, 3908, 3185, 3184) -- Programming exercises to export
+    GROUP BY c.id, c.title, c.semester;