Skip to content

Commit

Permalink
Merge pull request #163 from ls1intum/add-grading-critera-support-llm
Browse files Browse the repository at this point in the history
Add grading criteria support for llm modules
  • Loading branch information
FelixTJDietrich authored Oct 27, 2023
2 parents 24caa9b + 6f8d009 commit e6749e5
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 14 deletions.
4 changes: 3 additions & 1 deletion athena/athena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path

from .app import app
from .schemas import ExerciseType
from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction
from .metadata import emit_meta, get_meta
from .experiment import get_experiment_environment
from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider # type: ignore
Expand Down Expand Up @@ -32,5 +32,7 @@ def run_module():
"get_meta",
"get_experiment_environment",
"ExerciseType",
"GradingCriterion",
"StructuredGradingInstruction",
"app"
]
1 change: 1 addition & 0 deletions athena/athena/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .programming_feedback import ProgrammingFeedback
from .programming_exercise import ProgrammingExercise
from .programming_submission import ProgrammingSubmission
from .grading_criterion import GradingCriterion, StructuredGradingInstruction
18 changes: 11 additions & 7 deletions athena/athena/schemas/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,23 @@

from pydantic import Field

from .grading_criterion import StructuredGradingInstruction
from .schema import Schema


class Feedback(Schema, ABC):
id: Optional[int] = Field(None, example=1)
title: Optional[str] = Field(None, description="The title of the feedback that is shown to the student.",
example="File src/pe1/MergeSort.java at line 12")
description: Optional[str] = Field(None, description="The detailed feedback description that is shown to the student.",
example="Your solution is correct.")
credits: float = Field(0.0, description="The number of points that the student received for this feedback.",
title: Optional[str] = Field(None,
description="The title of the feedback that is shown to the student.",
example="File src/pe1/MergeSort.java at line 12")
description: Optional[str] = Field(None,
description="The detailed feedback description that is shown to the student.",
example="Your solution is correct.")
credits: float = Field(0.0,
description="The number of points that the student received for this feedback.",
example=1.0)
structured_grading_instruction_id: Optional[int] = Field(None, description="The id of the structured grading instruction that this feedback belongs to.", example=1)
structured_grading_instruction_id: Optional[int] = Field(None,
description="The id of the structured grading instruction that this feedback belongs to.",
example=1)

meta: dict = Field({}, example={})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class FeedbackModel(BaseModel):
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)

class Config:
title = "Feedback"
Expand Down Expand Up @@ -239,6 +242,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
line_start=feedback.line_start,
line_end=feedback.line_end,
credits=feedback.credits,
structured_grading_instruction_id=feedback.grading_instruction_id,
meta={}
))

Expand Down
35 changes: 35 additions & 0 deletions module_programming_llm/module_programming_llm/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from git.repo import Repo
from langchain.document_loaders import GitLoader

from athena import GradingCriterion

def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]:
return {
Expand All @@ -24,6 +25,40 @@ def merge_repos_by_filepath(*repos: Repo, file_filter: Optional[Callable[[str],
yield (file, [doc.get(file) for doc in docs])


def format_grading_instructions(grading_instructions: Optional[str], grading_criteria: Optional[List[GradingCriterion]]) -> Optional[str]:
"""Formats grading instructions and the grading criteria with nested structured grading instructions into a single string.
Args:
grading_instructions (Optional[str]): Grading instructions
grading_criteria (Optional[List[GradingCriterion]]): Grading criteria with nested structured grading instructions
Returns:
Optional[str]: Formatted grading instructions or None if no grading instructions or grading criteria are provided
"""

if not grading_instructions and not grading_criteria:
return None

result = ""
if grading_instructions:
result += grading_instructions + "\n\n"

if grading_criteria:
for grading_criterion in grading_criteria:
result += f'Criterion > "{(grading_criterion.title or "Unnamed criterion")}":\n'
for grading_instruction in grading_criterion.structured_grading_instructions:
result += f' - grading_instruction_id={grading_instruction.id} > "{grading_instruction.feedback}": ('
if grading_instruction.usage_count > 0:
result += f'can be used {grading_instruction.usage_count} times in total'
else:
result += "can be used unlimited times"
result += f', gives {grading_instruction.credits} credits for "{grading_instruction.grading_scale}" grading scale, '
result += f'usage description: "{grading_instruction.instruction_description}")\n'
result += "\n"

return result.strip()


def add_line_numbers(content: str) -> str:
lines = content.splitlines()
line_number_max_length = len(str(len(lines)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
num_tokens_from_prompt,
predict_and_parse
)
from module_programming_llm.helpers.utils import get_diff
from module_programming_llm.helpers.utils import format_grading_instructions, get_diff


class FileGradingInstruction(BaseModel):
Expand Down Expand Up @@ -47,9 +47,11 @@ async def split_grading_instructions_by_file(
Optional[SplitGradingInstructions]: Split grading instructions, None if it is too short or too long
"""

grading_instructions = format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria)

# Return None if the grading instructions are too short
if (exercise.grading_instructions is None
or num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split):
if (grading_instructions is None
or num_tokens_from_string(grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split):
return None

# Return None if the grading instructions are not in the prompt
Expand Down Expand Up @@ -84,7 +86,7 @@ async def split_grading_instructions_by_file(
)

prompt_input = {
"grading_instructions": exercise.grading_instructions,
"grading_instructions": grading_instructions,
"changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
"changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
}
Expand Down
8 changes: 6 additions & 2 deletions module_text_llm/module_text_llm/generate_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
num_tokens_from_prompt,
predict_and_parse
)
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions

class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)

class Config:
title = "Feedback"
Expand All @@ -40,7 +43,7 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
prompt_input = {
"max_points": exercise.max_points,
"bonus_points": exercise.bonus_points,
"grading_instructions": exercise.grading_instructions,
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution,
"submission": add_sentence_numbers(submission.text)
Expand Down Expand Up @@ -102,6 +105,7 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
structured_grading_instruction_id=feedback.grading_instruction_id,
meta={}
))

Expand Down
36 changes: 36 additions & 0 deletions module_text_llm/module_text_llm/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import tiktoken
from nltk.tokenize import sent_tokenize

from athena import GradingCriterion

# This is correct for gpt-4 and chat gpt3.5 but might be different for other models
def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string."""
Expand All @@ -11,6 +13,40 @@ def num_tokens_from_string(string: str) -> int:
return num_tokens


def format_grading_instructions(grading_instructions: Optional[str], grading_criteria: Optional[List[GradingCriterion]]) -> Optional[str]:
"""Formats grading instructions and the grading criteria with nested structured grading instructions into a single string.
Args:
grading_instructions (Optional[str]): Grading instructions
grading_criteria (Optional[List[GradingCriterion]]): Grading criteria with nested structured grading instructions
Returns:
Optional[str]: Formatted grading instructions or None if no grading instructions or grading criteria are provided
"""

if not grading_instructions and not grading_criteria:
return None

result = ""
if grading_instructions:
result += grading_instructions + "\n\n"

if grading_criteria:
for grading_criterion in grading_criteria:
result += f'Criterion > "{(grading_criterion.title or "Unnamed criterion")}":\n'
for grading_instruction in grading_criterion.structured_grading_instructions:
result += f' - grading_instruction_id={grading_instruction.id} > "{grading_instruction.feedback}": ('
if grading_instruction.usage_count > 0:
result += f'can be used {grading_instruction.usage_count} times in total'
else:
result += "can be used unlimited times"
result += f', gives {grading_instruction.credits} credits for "{grading_instruction.grading_scale}" grading scale, '
result += f'usage description: "{grading_instruction.instruction_description}")\n'
result += "\n"

return result.strip()


def add_sentence_numbers(content: str) -> str:
sentences = sent_tokenize(content)
sentences = [line for sentence in sentences for line in sentence.split("\n")]
Expand Down

0 comments on commit e6749e5

Please sign in to comment.