Skip to content

Commit

Permalink
Merge pull request #83 from ls1intum/feature/improve-programming-module
Browse files Browse the repository at this point in the history
General improvements for `module_programming_llm`
  • Loading branch information
FelixTJDietrich authored Oct 20, 2023
2 parents 323d100 + 70b8f40 commit d06e17f
Show file tree
Hide file tree
Showing 27 changed files with 2,258 additions and 605 deletions.
4 changes: 2 additions & 2 deletions athena/athena/helpers/programming/code_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_repository(url: str) -> Repo:
repo_zip.extractall(cache_dir_path)
if not (cache_dir_path / ".git").exists():
repo = Repo.init(cache_dir_path, initial_branch='main')
repo.index.add(repo.untracked_files)
repo.index.commit("Initial commit")
repo.git.add(all=True, force=True)
repo.git.commit('-m', 'Initial commit')

return Repo(cache_dir_path)
2 changes: 1 addition & 1 deletion athena/athena/schemas/programming_exercise.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import Field, AnyUrl
from zipfile import ZipFile
from git import Repo
from git.repo import Repo

from athena.helpers.programming.code_repository import get_repository_zip, get_repository
from .exercise_type import ExerciseType
Expand Down
35 changes: 29 additions & 6 deletions env_example/module_programming_llm.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,33 @@ PRODUCTION=1
SECRET=12345abcdef
DATABASE_URL=postgresql://postgres:password@postgres:5432/athena

OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Can be empty if Azure API is not used
OPENAI_API_TYPE="azure" # change to "azure" if Azure is used
OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
OPENAI_API_VERSION="2023-03-15-preview" # change version if needed
AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name
################################################################
# LLM Credentials #
################################################################

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35"

# Standard OpenAI (Non-Azure) [leave blank if not used]
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models)
LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Azure OpenAI [leave blank if not used]
# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed

# Replicate [leave blank if not used]
# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py`
REPLICATE_API_TOKEN=

# LangSmith (can be used for tracing LLMs) [leave blank if not used]
# See https://docs.smith.langchain.com
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="XXX"
# LANGCHAIN_PROJECT="XXX"
34 changes: 28 additions & 6 deletions module_programming_llm/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,32 @@ SECRET=12345abcdef
DATABASE_URL=sqlite:///../data/data.sqlite


OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
################################################################
# LLM Credentials #
################################################################

# Can be empty if Azure API is not used
OPENAI_API_TYPE="azure" # change to "azure" if Azure is used
OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
OPENAI_API_VERSION="2023-03-15-preview" # change version if needed
AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name
# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35"

# Standard OpenAI (Non-Azure) [leave blank if not used]
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models)
LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Azure OpenAI [leave blank if not used]
# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed

# Replicate [leave blank if not used]
# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py`
REPLICATE_API_TOKEN=

# LangSmith (can be used for tracing LLMs) [leave blank if not used]
# See https://docs.smith.langchain.com
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="XXX"
# LANGCHAIN_PROJECT="XXX"
30 changes: 9 additions & 21 deletions module_programming_llm/module_programming_llm/__main__.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,38 @@
from typing import List

import tiktoken

from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider
from athena.storage import store_exercise
from athena.programming import Exercise, Submission, Feedback
from athena.logger import logger
from module_programming_llm.config import Configuration

from module_programming_llm.basic.basic_feedback_provider import suggest_feedback as suggest_feedback_basic
from module_programming_llm.basic.file_instructions import generate_file_grading_instructions, generate_file_problem_statements
from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file


@submissions_consumer
def receive_submissions(exercise: Exercise, submissions: List[Submission]):
logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)

# Split problem statements and grading instructions
exercise.meta['file_grading_instructions'] = generate_file_grading_instructions(exercise)
exercise.meta['file_problem_statements'] = generate_file_problem_statements(exercise)

store_exercise(exercise)


@submission_selector
def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission:
logger.info("select_submission: Received %d, submissions for exercise %d", len(submissions), exercise.id)
# Always return the first submission
return submissions[0]


@feedback_consumer
def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
logger.info("process_feedback: Received feedbacks for submission %d of exercise %d.", submission.id, exercise.id)
logger.info("process_feedback: Feedbacks: %s", feedbacks)
# Do something with the feedback
logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)


@feedback_provider
async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
async def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id)
# Do something with the submission and return a list of feedback

# Check if file based grading instructions and problem statements are available
if 'file_grading_instructions' in exercise.meta and 'file_problem_statements' in exercise.meta:
return await suggest_feedback_basic(exercise, submission)
logger.info("suggest_feedback: No file based grading instructions and problem statements available. Skipping feedback generation.")
return []
return await generate_suggestions_by_file(exercise, submission, module_config.approach, module_config.debug)


if __name__ == "__main__":
# Preload for token estimation later
tiktoken.get_encoding("cl100k_base")
app.start()

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit d06e17f

Please sign in to comment.