Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/ls1intum/Athena into doc…
Browse files Browse the repository at this point in the history
…s/athena
  • Loading branch information
FelixTJDietrich committed Nov 12, 2023
2 parents c9438b1 + 89e7047 commit 08bb529
Show file tree
Hide file tree
Showing 46 changed files with 2,080 additions and 1,012 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class HealthResponse(BaseModel):
"""
Response indicating whether the Assessment Module Manager is healthy,
and whether all the modules are healthy (i.e. reachable).
Additional information about the modules is also provided.
"""
status: str = Field(const=True, default="ok", example="ok")
modules: dict = Field(
Expand All @@ -35,7 +36,8 @@ class HealthResponse(BaseModel):
"module_example": {
"url": "http://localhost:5001",
"type": "programming",
"healthy": True
"healthy": True,
"supportsEvaluation": True
}
}
]
Expand All @@ -56,6 +58,7 @@ async def get_health() -> HealthResponse:
"url": module.url,
"type": module.type,
"healthy": await is_healthy(module),
"supportsEvaluation": module.supports_evaluation
}
for module in get_modules()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import configparser
import os

from typing import List, cast
from pathlib import Path

Expand All @@ -16,8 +18,9 @@ def list_modules() -> List[Module]:
return [
Module(
name=module,
url=cast(AnyHttpUrl, modules_config[module]["url"]),
url=cast(AnyHttpUrl, os.environ.get(f"{module.upper()}_URL", modules_config[module]["url"])),
type=ExerciseType(modules_config[module]["type"]),
supports_evaluation=modules_config[module].getboolean("supports_evaluation"),
)
for module in modules_config.sections()
]
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ class Module(BaseModel):
name: str = Field(example="module_example")
url: AnyHttpUrl = Field(example="http://localhost:5001")
type: ExerciseType = Field(example=ExerciseType.text)
supports_evaluation: bool = Field(description="Whether the module supports evaluation", example=True)
7 changes: 6 additions & 1 deletion assessment_module_manager/modules.docker.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
[module_example]
url = http://module-example:5001
type = programming
supports_evaluation = true

[module_programming_llm]
url = http://module-programming-llm:5002
type = programming
supports_evaluation = false

[module_text_llm]
url = http://module-text-llm:5003
type = text
supports_evaluation = true

[module_text_cofee]
url = http://module-text-cofee:5004
type = text
supports_evaluation = false

[module_programming_themisml]
url = http://module-programming-themisml:5005
type = programming
type = programming
supports_evaluation = false
5 changes: 5 additions & 0 deletions assessment_module_manager/modules.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
[module_example]
url = http://localhost:5001
type = programming
supports_evaluation = true

[module_programming_llm]
url = http://localhost:5002
type = programming
supports_evaluation = false

[module_text_llm]
url = http://localhost:5003
type = text
supports_evaluation = true

[module_text_cofee]
url = http://localhost:5004
type = text
supports_evaluation = false

[module_programming_themisml]
url = http://localhost:5005
type = programming
supports_evaluation = false
3 changes: 2 additions & 1 deletion athena/athena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction
from .metadata import emit_meta, get_meta
from .experiment import get_experiment_environment
from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider # type: ignore
from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore


@app.get("/")
Expand All @@ -28,6 +28,7 @@ def run_module():
"feedback_consumer",
"feedback_provider",
"config_schema_provider",
"evaluation_provider",
"emit_meta",
"get_meta",
"get_experiment_environment",
Expand Down
61 changes: 60 additions & 1 deletion athena/athena/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,4 +358,63 @@ def config_schema_provider(cls: Type[C]) -> Type[C]:
async def wrapper():
return cls.schema()

return cls
return cls


def evaluation_provider(func: Union[
Callable[[E, S, List[F], List[F]], Any],
Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]]
]):
"""
Provide evaluated feedback to the Assessment Module Manager.
Note: The evaluation provider is usually called during the research and development phase (by the Playground).
Return arbitrary evaluation results.
This decorator can be used with several types of functions: synchronous or asynchronous.
Examples:
Below are some examples of possible functions that you can decorate with this decorator:
Without using module config (both synchronous and asynchronous forms):
>>> @evaluation_provider
... def sync_evaluate_feedback(
... exercise: Exercise, submission: Submission,
... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]
... ) -> Any:
... # evaluate predicted feedback here and return evaluation results
>>> @feedback_provider
... async def async_evaluate_feedback(
... exercise: Exercise, submission: Submission,
... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]
... ) -> Any:
... # evaluate predicted feedback here and return evaluation results
"""
exercise_type = inspect.signature(func).parameters["exercise"].annotation
submission_type = inspect.signature(func).parameters["submission"].annotation
feedback_type = inspect.signature(func).parameters["predicted_feedbacks"].annotation.__args__[0]

@app.post("/evaluation", responses=module_responses)
@authenticated
@with_meta
async def wrapper(
exercise: exercise_type,
submission: submission_type,
true_feedbacks: List[feedback_type],
predicted_feedbacks: List[feedback_type],
):
# Retrieve existing metadata for the exercise, submission and feedback
exercise.meta.update(get_stored_exercise_meta(exercise) or {})
submission.meta.update(get_stored_submission_meta(submission) or {})
for feedback in true_feedbacks + predicted_feedbacks:
feedback.meta.update(get_stored_feedback_meta(feedback) or {})

# Call the actual provider
if inspect.iscoroutinefunction(func):
evaluation = await func(exercise, submission, true_feedbacks, predicted_feedbacks)
else:
evaluation = func(exercise, submission, true_feedbacks, predicted_feedbacks)

return evaluation
return wrapper
Binary file added docs/images/load-anonymized-database-dump.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 32 additions & 1 deletion docs/module/structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Example:
)
]
Provide Config Schema
Provide Config Schema (Optional)
~~~~~~~~~~~~~~~~~~~~~~
Get a schema for config options of the module as json schema. The config complying to the schema can then be provided in the header of a request `X-Module-Config` to override the default values. The module can decorate one pydantic model with ``@config_schema_provider`` to provide the schema and should have default values set for all fields as default configuration. The configuration class can be appended to the function signature of all other decorators to provide the configuration to the function.

Expand All @@ -108,6 +108,37 @@ Example:
debug: bool = Field(False, description="Whether the module is in debug mode.")
...
Provide Evaluation (Optional)
~~~~~~~~~~~~~~~~~~
Get an arbitrary evaluation for a submission with historical ``true_feedback`` and feedback suggestions ``predicted_feedback``. The Playground would usually call this when conducting an evaluation during an experiment. The module will receive the request at the function annotated with ``@evaluation_provider``.

If you want to have the ``/evaluation`` endpoint available during the Playground evaluation mode, you need to set ``supports_evaluation = true`` in the ``modules.ini`` and ``modules.docker.ini`` files.

Example:
.. code-block:: python
from athena import *
@evaluation_provider
def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any:
# Do something with the true and predicted feedback and return the evaluation result
...
# Example: Generate some example evaluation result
evaluation_results = []
true_feedback_embeddings = [random.random() for _ in true_feedbacks]
predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks]
for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings):
feedback_evaluation = {
"feedback_id": feedback.id,
"embedding": embedding,
"has_match": len([t for t in true_feedback_embeddings if abs(t - embedding) < 0.1]) > 0,
"correctness": random.random()
}
evaluation_results.append(feedback_evaluation)
...
# Return arbitrary evaluation results
return evaluation_results
Environment Variables
---------------------
You should provide at least the following environment variables for your module to work properly:
Expand Down
138 changes: 107 additions & 31 deletions docs/setup/evaluation.rst
Original file line number Diff line number Diff line change
@@ -1,54 +1,130 @@
Evaluation Data
Evaluation Data for Athena Playground
===========================================

The Playground comes bundled with a basic set of example data to test Athena's functionalities. For more comprehensive evaluation, you can load your own data or use anonymized data from `Artemis <https://github.com/ls1intum/Artemis>`_, an open-source LMS.
The Athena Playground is equipped with a set of example data for initial testing. To conduct a more thorough evaluation, users have the option to use their own datasets or request anonymized data from `Artemis <https://github.com/ls1intum/Artemis>`_, an open-source LMS.

Example Data
-------------------------------------------
This data is provided within the `playground/data/example` directory and is automatically utilized when launching the Playground.

Evaluation Data
-------------------------------------------
The `playground/data/evaluation` directory is designated for your custom data used for evaluation purposes. Initially, it's left empty for you to populate.
Example Data
------------

Artemis Evaluation Data
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you're integrating with Artemis LMS and would like to evaluate their data, you can request an anonymized database dump from the Artemis team. This request requires a valid reason and a signed data protection agreement (NDA). For further details, please get in touch with the Artemis team.
Located in ``playground/data/example``, this default dataset is automatically used when the Playground is initiated.

Once the database dump is acquired, follow these steps to export the data to the Playground:

1. **Load the Database Dump:**
Evaluation Data
---------------

.. code-block:: bash
The directory ``playground/data/evaluation`` is reserved for your custom data. It is initially empty, ready to be filled with your evaluation datasets.

npm run export:artemis:1-load-anonymized-database-dump

This command loads the data into your local MySQL database. You can use the same database as Artemis.
Exporting Evaluation Data from Artemis
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

2. **Export the Data:**
To evaluate using data from Artemis, you can request an anonymized database dump, contingent on a valid justification and a signed data protection agreement. Contact the Artemis team for details.

.. code-block:: bash
Steps to Export Evaluation Data from Artemis:
"""""""""""""""""""""""""""""""""""""""""""""

npm run export:artemis:2-export-evaluation-data
1. **Setup a MySQL database:**
Create a new MySQL database and user. You can use the same database instance as Artemis or a separate one. You can follow the instructions in the `Artemis documentation <https://docs.artemis.cit.tum.de/dev/setup/database.html#mysql-setup>`_ to set up a MySQL database.

This exports exercises listed under `playground/scripts/artemis/evaluation_data` to the `playground/data/evaluation` directory, where you can use it for evaluation purposes.

Artemis Programming Exercises
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Artemis programming exercises are not included in the anonymized database dump. To access these exercises, you'll need to request them separately from the Artemis team. Once you have the programming exercises, an instructor from the course can export them using the following commands:
2. **Load the Database Dump:**
Use the command below to import the anonymized data into your local MySQL database. You will only need to do this once to populate the database. The script will ask you for the database ``host``, ``port``, ``user``, ``password``, and ``database``. Additionally, you will need to provide the path to the anonymized database dump, e.g. ``/home/user/artemis-database-dump.sql``.

1. **Download the Repositories:**
.. code-block:: bash
.. code-block:: bash
npm run export:artemis:1-load-anonymized-database-dump
npm run export:artemis:3-download-programming-repositories
.. image:: ../images/load-anonymized-database-dump.png
:width: 500px
:alt: Example terminal screenshot of the command to load the anonymized database dump
:align: center

This command exports the programming exercises' materials and submissions to the `playground/data/evaluation` directory. The instructor should then zip these and send them to you.
3. **Export the Data:**
This command exports the data specified in ``playground/scripts/artemis/evaluation_data/text_exercises.json`` to your local ``playground/data/evaluation`` directory.

2. **Link the Repositories:**
.. code-block:: bash
.. code-block:: bash
npm run export:artemis:2-export-evaluation-data
npm run export:artemis:4-link-programming-repositories
This command links the repositories to the `exercise-*.json` files and validates if there are any missing repositories.
Artemis Programming Exercises
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Programming exercises are not part of the anonymized database dump and must be requested separately from the Artemis team. You can find the selected exercise and their participation IDs for export in ``playground/scripts/artemis/evaluation_data/programming_exercises.json``.

Steps for Instructors to Export Programming Exercises:
""""""""""""""""""""""""""""""""""""""""""""""""""""""

4. **Download Repositories:**
Instructors can download materials and submissions from Artemis using the command below, then zip and transfer them to you. Keep in mind that this command will take a long time to run if there are many participations to download.

.. code-block:: bash
npm run export:artemis:3-download-programming-repositories
5. **Link the Repositories:**
Put the downloaded repositories in the ``playground/data/evaluation`` directory and link them to the respective exercises using the following command. This command will also validate if there are any missing repositories. Without this step, the programming repositories will not be available in the Playground.

.. code-block:: bash
npm run export:artemis:4-link-programming-repositories
Generating ``programming_exercises.json``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The SQL script provided below can be adapted to generate a ``programming_exercises.json`` file, located at ``playground/scripts/artemis/evaluation_data/programming_exercises.json``. Similar logic applies to create ``text_exercises.json``. The script collects data on selected exercises, aggregates participation data, and formats it into a JSON structure suitable for export scripts.

**Note:** The provided SQL script is an example and should be tailored to include the specific IDs of the programming exercises you wish to export. You might want to reduce the number of participations to export if you don't need all of them. ``anonymized_artemis`` should be replaced with the name of your database.

.. code-block:: sql
WITH temp_course_exercises AS (
SELECT
DISTINCT e.id,
c.id AS course_id,
0 as is_exam_exercise -- Course exercises
FROM
anonymized_artemis.exercise e
JOIN anonymized_artemis.course c ON e.course_id = c.id
),
temp_exam_exercises AS (
SELECT
DISTINCT e.id,
c.id AS course_id,
1 as is_exam_exercise -- Exam exercises
FROM
anonymized_artemis.course c
JOIN anonymized_artemis.exam ex ON ex.course_id = c.id
JOIN anonymized_artemis.exercise_group eg ON eg.exam_id = ex.id
JOIN anonymized_artemis.exercise e ON e.exercise_group_id = eg.id
),
temp_exercises AS (
SELECT * FROM temp_course_exercises
UNION
SELECT * FROM temp_exam_exercises
)
SELECT JSON_OBJECT(
c.title, JSON_OBJECT(
'course_id', c.id,
'semester', c.semester,
'exercises', JSON_ARRAYAGG(
JSON_OBJECT(
'id', e.id,
'title', e.title,
'is_exam_exercise', te.is_exam_exercise
)
),
'participations', JSON_ARRAYAGG(
(SELECT JSON_ARRAYAGG(p.id)
FROM anonymized_artemis.participation p -- Note: This contains also participations that are maybe unneccessary
WHERE p.exercise_id = e.id)
)
)
)
FROM temp_exercises te
JOIN anonymized_artemis.exercise e ON te.id = e.id
JOIN anonymized_artemis.course c ON c.id = te.course_id
WHERE e.id IN (2610, 3782, 2111, 2104, 3187, 3781, 6344, 6433, 3942, 3693, 4864, 4896, 3913, 3914, 3908, 3185, 3184) -- Programming exercises to export
GROUP BY c.id, c.title, c.semester;
Loading

0 comments on commit 08bb529

Please sign in to comment.