From 15aae1dc3ccdc4bde38a81ff9b1da0328402d31c Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Fri, 18 Aug 2023 22:27:17 +0200
Subject: [PATCH 01/51] update dependencies

---
 module_programming_llm/poetry.lock    | 686 +++++++++++++++-----------
 module_programming_llm/pyproject.toml |   7 +-
 2 files changed, 394 insertions(+), 299 deletions(-)

diff --git a/module_programming_llm/poetry.lock b/module_programming_llm/poetry.lock
index d0015f73a..c0f838c48 100644
--- a/module_programming_llm/poetry.lock
+++ b/module_programming_llm/poetry.lock
@@ -1,9 +1,10 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
 version = "3.8.5"
 description = "Async http client/server framework (asyncio)"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -112,6 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"]
 name = "aiosignal"
 version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -126,6 +128,7 @@ frozenlist = ">=1.1.0"
 name = "anyio"
 version = "3.7.1"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -145,23 +148,25 @@ trio = ["trio (<0.22)"]
 
 [[package]]
 name = "async-timeout"
-version = "4.0.2"
+version = "4.0.3"
 description = "Timeout context manager for asyncio programs"
+category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"},
-    {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"},
+    {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
+    {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
 ]
 
 [[package]]
 name = "athena"
 version = "1.0.0"
 description = "This is a helper module for easier development of Athena modules. It provides communication functionality with the Assessment Module manager, as well as helper functions for storage."
+category = "main"
 optional = false
 python-versions = "^3.10"
 files = []
-develop = false
+develop = true
 
 [package.dependencies]
 fastapi = "^0.96.0"
@@ -169,7 +174,7 @@ gitpython = "^3.1.31"
 httpx = "^0.24.1"
 psycopg2 = "^2.9.6"
 sqlalchemy = {version = "^2.0.15", extras = ["mypy"]}
-uvicorn = "^0.22.0"
+uvicorn = "^0.23.0"
 
 [package.source]
 type = "directory"
@@ -179,6 +184,7 @@ url = "../athena"
 name = "attrs"
 version = "23.1.0"
 description = "Classes Without Boilerplate"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -197,6 +203,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte
 name = "certifi"
 version = "2023.7.22"
 description = "Python package for providing Mozilla's CA Bundle."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -208,6 +215,7 @@ files = [
 name = "charset-normalizer"
 version = "3.2.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -290,13 +298,14 @@ files = [
 
 [[package]]
 name = "click"
-version = "8.1.6"
+version = "8.1.7"
 description = "Composable command line interface toolkit"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"},
-    {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"},
+    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
 ]
 
 [package.dependencies]
@@ -306,6 +315,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 name = "colorama"
 version = "0.4.6"
 description = "Cross-platform colored terminal text."
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
@@ -317,6 +327,7 @@ files = [
 name = "dataclasses-json"
 version = "0.5.9"
 description = "Easily serialize dataclasses to and from JSON"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -334,13 +345,14 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (
 
 [[package]]
 name = "exceptiongroup"
-version = "1.1.2"
+version = "1.1.3"
 description = "Backport of PEP 654 (exception groups)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"},
-    {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"},
+    {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
+    {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
 ]
 
 [package.extras]
@@ -350,6 +362,7 @@ test = ["pytest (>=6)"]
 name = "fastapi"
 version = "0.96.1"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -371,6 +384,7 @@ test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6
 name = "frozenlist"
 version = "1.4.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -441,6 +455,7 @@ files = [
 name = "gitdb"
 version = "4.0.10"
 description = "Git Object Database"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -455,6 +470,7 @@ smmap = ">=3.0.1,<6"
 name = "gitpython"
 version = "3.1.32"
 description = "GitPython is a Python library used to interact with Git repositories"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -469,6 +485,7 @@ gitdb = ">=4.0.1,<5"
 name = "greenlet"
 version = "2.0.2"
 description = "Lightweight in-process concurrent programming"
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 files = [
@@ -542,6 +559,7 @@ test = ["objgraph", "psutil"]
 name = "h11"
 version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -553,6 +571,7 @@ files = [
 name = "httpcore"
 version = "0.17.3"
 description = "A minimal low-level HTTP client."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -564,16 +583,17 @@ files = [
 anyio = ">=3.0,<5.0"
 certifi = "*"
 h11 = ">=0.13,<0.15"
-sniffio = "==1.*"
+sniffio = ">=1.0.0,<2.0.0"
 
 [package.extras]
 http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
+socks = ["socksio (>=1.0.0,<2.0.0)"]
 
 [[package]]
 name = "httpx"
 version = "0.24.1"
 description = "The next generation HTTP client."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -589,14 +609,15 @@ sniffio = "*"
 
 [package.extras]
 brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
+socks = ["socksio (>=1.0.0,<2.0.0)"]
 
 [[package]]
 name = "idna"
 version = "3.4"
 description = "Internationalized Domain Names in Applications (IDNA)"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -604,65 +625,79 @@ files = [
     {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
 ]
 
+[[package]]
+name = "joblib"
+version = "1.3.2"
+description = "Lightweight pipelining with Python functions"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"},
+    {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"},
+]
+
 [[package]]
 name = "langchain"
-version = "0.0.225"
+version = "0.0.267"
 description = "Building applications with LLMs through composability"
+category = "main"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.225-py3-none-any.whl", hash = "sha256:a72132ceb22f9a9fcad944778fd351d33b63e27a9c03f61d17e4e52fad37fe47"},
-    {file = "langchain-0.0.225.tar.gz", hash = "sha256:986e0838faef3c344d8c00b0079814ca357a39b17f7642e06beb075d3c59c770"},
+    {file = "langchain-0.0.267-py3-none-any.whl", hash = "sha256:191ab96aa6f633ecf850e944b68782e7bc237495bd91132e5ff6f9749f452f97"},
+    {file = "langchain-0.0.267.tar.gz", hash = "sha256:61ee406332d9f87b71d662883f99677d39c37c6e8dbabd1c0b88335c0df43043"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
 dataclasses-json = ">=0.5.7,<0.6.0"
-langchainplus-sdk = ">=0.0.20,<0.0.21"
+langsmith = ">=0.0.21,<0.1.0"
 numexpr = ">=2.8.4,<3.0.0"
 numpy = ">=1,<2"
 openapi-schema-pydantic = ">=1.2,<2.0"
-pydantic = ">=1,<2"
-PyYAML = ">=5.4.1"
+pydantic = ">=1,<3"
+PyYAML = ">=5.3"
 requests = ">=2,<3"
 SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
-all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.9.1,<0.10.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
-azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0a20230509004)", "openai (>=0,<1)"]
-clarifai = ["clarifai (==9.1.0)"]
-cohere = ["cohere (>=3,<4)"]
+all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
+azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
+clarifai = ["clarifai (>=9.1.0)"]
+cohere = ["cohere (>=4,<5)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.32)"]
+extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
-llms = ["anthropic (>=0.3,<0.4)", "clarifai (==9.1.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
+llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
 openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
-qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"]
+qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
 text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
 
 [[package]]
-name = "langchainplus-sdk"
-version = "0.0.20"
+name = "langsmith"
+version = "0.0.24"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
+category = "main"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchainplus_sdk-0.0.20-py3-none-any.whl", hash = "sha256:07a869d476755803aa04c4986ce78d00c2fe4ff584c0eaa57d7570c9664188db"},
-    {file = "langchainplus_sdk-0.0.20.tar.gz", hash = "sha256:3d300e2e3290f68cc9d842c059f9458deba60e776c9e790309688cad1bfbb219"},
+    {file = "langsmith-0.0.24-py3-none-any.whl", hash = "sha256:f9f951d070aa1919123d700642aca9c781edfc8797a65ab1161aa12f89bed707"},
+    {file = "langsmith-0.0.24.tar.gz", hash = "sha256:9c066dd915752324490a735692997b0db0958f5dfc1e0a0dfbf752c6e62c7529"},
 ]
 
 [package.dependencies]
-pydantic = ">=1,<2"
+pydantic = ">=1,<3"
 requests = ">=2,<3"
-tenacity = ">=8.1.0,<9.0.0"
 
 [[package]]
 name = "marshmallow"
 version = "3.20.1"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -683,6 +718,7 @@ tests = ["pytest", "pytz", "simplejson"]
 name = "marshmallow-enum"
 version = "1.5.1"
 description = "Enum field for Marshmallow"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -697,6 +733,7 @@ marshmallow = ">=2.0.0"
 name = "multidict"
 version = "6.0.4"
 description = "multidict implementation"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -778,37 +815,39 @@ files = [
 
 [[package]]
 name = "mypy"
-version = "1.4.1"
+version = "1.5.1"
 description = "Optional static typing for Python"
+category = "main"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"},
-    {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"},
-    {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"},
-    {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"},
-    {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"},
-    {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"},
-    {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"},
-    {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"},
-    {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"},
-    {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"},
-    {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"},
-    {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"},
-    {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"},
-    {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"},
-    {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"},
-    {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"},
-    {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"},
-    {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"},
-    {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"},
-    {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"},
-    {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"},
-    {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"},
-    {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"},
-    {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"},
-    {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"},
-    {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"},
+    {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"},
+    {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"},
+    {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"},
+    {file = "mypy-1.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d"},
+    {file = "mypy-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25"},
+    {file = "mypy-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4"},
+    {file = "mypy-1.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4"},
+    {file = "mypy-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243"},
+    {file = "mypy-1.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275"},
+    {file = "mypy-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315"},
+    {file = "mypy-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb"},
+    {file = "mypy-1.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373"},
+    {file = "mypy-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161"},
+    {file = "mypy-1.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a"},
+    {file = "mypy-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1"},
+    {file = "mypy-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65"},
+    {file = "mypy-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160"},
+    {file = "mypy-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2"},
+    {file = "mypy-1.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb"},
+    {file = "mypy-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f"},
+    {file = "mypy-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a"},
+    {file = "mypy-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14"},
+    {file = "mypy-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb"},
+    {file = "mypy-1.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693"},
+    {file = "mypy-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770"},
+    {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"},
+    {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"},
 ]
 
 [package.dependencies]
@@ -819,13 +858,13 @@ typing-extensions = ">=4.1.0"
 [package.extras]
 dmypy = ["psutil (>=4.0)"]
 install-types = ["pip"]
-python2 = ["typed-ast (>=1.4.0,<2)"]
 reports = ["lxml"]
 
 [[package]]
 name = "mypy-extensions"
 version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -833,43 +872,70 @@ files = [
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
 ]
 
+[[package]]
+name = "nltk"
+version = "3.8.1"
+description = "Natural Language Toolkit"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"},
+    {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"},
+]
+
+[package.dependencies]
+click = "*"
+joblib = "*"
+regex = ">=2021.8.3"
+tqdm = "*"
+
+[package.extras]
+all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"]
+corenlp = ["requests"]
+machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
+plot = ["matplotlib"]
+tgrep = ["pyparsing"]
+twitter = ["twython"]
+
 [[package]]
 name = "numexpr"
-version = "2.8.4"
+version = "2.8.5"
 description = "Fast numerical expression evaluator for NumPy"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "numexpr-2.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a75967d46b6bd56455dd32da6285e5ffabe155d0ee61eef685bbfb8dafb2e484"},
-    {file = "numexpr-2.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db93cf1842f068247de631bfc8af20118bf1f9447cd929b531595a5e0efc9346"},
-    {file = "numexpr-2.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bca95f4473b444428061d4cda8e59ac564dc7dc6a1dea3015af9805c6bc2946"},
-    {file = "numexpr-2.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e34931089a6bafc77aaae21f37ad6594b98aa1085bb8b45d5b3cd038c3c17d9"},
-    {file = "numexpr-2.8.4-cp310-cp310-win32.whl", hash = "sha256:f3a920bfac2645017110b87ddbe364c9c7a742870a4d2f6120b8786c25dc6db3"},
-    {file = "numexpr-2.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:6931b1e9d4f629f43c14b21d44f3f77997298bea43790cfcdb4dd98804f90783"},
-    {file = "numexpr-2.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9400781553541f414f82eac056f2b4c965373650df9694286b9bd7e8d413f8d8"},
-    {file = "numexpr-2.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6ee9db7598dd4001138b482342b96d78110dd77cefc051ec75af3295604dde6a"},
-    {file = "numexpr-2.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff5835e8af9a212e8480003d731aad1727aaea909926fd009e8ae6a1cba7f141"},
-    {file = "numexpr-2.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:655d84eb09adfee3c09ecf4a89a512225da153fdb7de13c447404b7d0523a9a7"},
-    {file = "numexpr-2.8.4-cp311-cp311-win32.whl", hash = "sha256:5538b30199bfc68886d2be18fcef3abd11d9271767a7a69ff3688defe782800a"},
-    {file = "numexpr-2.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:3f039321d1c17962c33079987b675fb251b273dbec0f51aac0934e932446ccc3"},
-    {file = "numexpr-2.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c867cc36cf815a3ec9122029874e00d8fbcef65035c4a5901e9b120dd5d626a2"},
-    {file = "numexpr-2.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:059546e8f6283ccdb47c683101a890844f667fa6d56258d48ae2ecf1b3875957"},
-    {file = "numexpr-2.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:845a6aa0ed3e2a53239b89c1ebfa8cf052d3cc6e053c72805e8153300078c0b1"},
-    {file = "numexpr-2.8.4-cp37-cp37m-win32.whl", hash = "sha256:a38664e699526cb1687aefd9069e2b5b9387da7feac4545de446141f1ef86f46"},
-    {file = "numexpr-2.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:eaec59e9bf70ff05615c34a8b8d6c7bd042bd9f55465d7b495ea5436f45319d0"},
-    {file = "numexpr-2.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b318541bf3d8326682ebada087ba0050549a16d8b3fa260dd2585d73a83d20a7"},
-    {file = "numexpr-2.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b076db98ca65eeaf9bd224576e3ac84c05e451c0bd85b13664b7e5f7b62e2c70"},
-    {file = "numexpr-2.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90f12cc851240f7911a47c91aaf223dba753e98e46dff3017282e633602e76a7"},
-    {file = "numexpr-2.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c368aa35ae9b18840e78b05f929d3a7b3abccdba9630a878c7db74ca2368339"},
-    {file = "numexpr-2.8.4-cp38-cp38-win32.whl", hash = "sha256:b96334fc1748e9ec4f93d5fadb1044089d73fb08208fdb8382ed77c893f0be01"},
-    {file = "numexpr-2.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:a6d2d7740ae83ba5f3531e83afc4b626daa71df1ef903970947903345c37bd03"},
-    {file = "numexpr-2.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:77898fdf3da6bb96aa8a4759a8231d763a75d848b2f2e5c5279dad0b243c8dfe"},
-    {file = "numexpr-2.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:df35324666b693f13a016bc7957de7cc4d8801b746b81060b671bf78a52b9037"},
-    {file = "numexpr-2.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ac9cfe6d0078c5fc06ba1c1bbd20b8783f28c6f475bbabd3cad53683075cab"},
-    {file = "numexpr-2.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df3a1f6b24214a1ab826e9c1c99edf1686c8e307547a9aef33910d586f626d01"},
-    {file = "numexpr-2.8.4-cp39-cp39-win32.whl", hash = "sha256:7d71add384adc9119568d7e9ffa8a35b195decae81e0abf54a2b7779852f0637"},
-    {file = "numexpr-2.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:9f096d707290a6a00b6ffdaf581ee37331109fb7b6c8744e9ded7c779a48e517"},
-    {file = "numexpr-2.8.4.tar.gz", hash = "sha256:d5432537418d18691b9115d615d6daa17ee8275baef3edf1afbbf8bc69806147"},
+    {file = "numexpr-2.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51f3ab160c3847ebcca93cd88f935a7802b54a01ab63fe93152994a64d7a6cf2"},
+    {file = "numexpr-2.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:de29c77f674e4eb8f0846525a475cab64008c227c8bc4ba5153ab3f72441cc63"},
+    {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf85ba1327eb87ec82ae7936f13c8850fb969a0ca34f3ba9fa3897c09d5c80d7"},
+    {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c00be69f747f44a631830215cab482f0f77f75af2925695adff57c1cc0f9a68"},
+    {file = "numexpr-2.8.5-cp310-cp310-win32.whl", hash = "sha256:c46350dcdb93e32f033eea5a21269514ffcaf501d9abd6036992d37e48a308b0"},
+    {file = "numexpr-2.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:894b027438b8ec88dea32a19193716c79f4ff8ddb92302dcc9731b51ba3565a8"},
+    {file = "numexpr-2.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6df184d40d4cf9f21c71f429962f39332f7398147762588c9f3a5c77065d0c06"},
+    {file = "numexpr-2.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:178b85ad373c6903e55d75787d61b92380439b70d94b001cb055a501b0821335"},
+    {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:578fe4008e4d5d6ff01bbeb2d7b7ba1ec658a5cda9c720cd26a9a8325f8ef438"},
+    {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef621b4ee366a5c6a484f6678c9259f5b826569f8bfa0b89ba2306d5055468bb"},
+    {file = "numexpr-2.8.5-cp311-cp311-win32.whl", hash = "sha256:dd57ab1a3d3aaa9274aff1cefbf93b8ddacc7973afef5b125905f6bf18fabab0"},
+    {file = "numexpr-2.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:783324ba40eb804ecfc9ebae86120a1e339ab112d0ab8a1f0d48a26354d5bf9b"},
+    {file = "numexpr-2.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:183d5430db76826e54465c69db93a3c6ecbf03cda5aa1bb96eaad0147e9b68dc"},
+    {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39ce106f92ccea5b07b1d6f2f3c4370f05edf27691dc720a63903484a2137e48"},
+    {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b594dc9e2d6291a0bc5c065e6d9caf3eee743b5663897832e9b17753c002947a"},
+    {file = "numexpr-2.8.5-cp37-cp37m-win32.whl", hash = "sha256:62b4faf8e0627673b0210a837792bddd23050ecebc98069ab23eb0633ff1ef5f"},
+    {file = "numexpr-2.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:db5c65417d69414f1ab31302ea01d3548303ef31209c38b4849d145be4e1d1ba"},
+    {file = "numexpr-2.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eb36ffcfa1606e41aa08d559b4277bcad0e16b83941d1a4fee8d2bd5a34f8e0e"},
+    {file = "numexpr-2.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:34af2a0e857d02a4bc5758bc037a777d50dacb13bcd57c7905268a3e44994ed6"},
+    {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a8dad2bfaad5a5c34a2e8bbf62b9df1dfab266d345fda1feb20ff4e264b347a"},
+    {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93f5a866cd13a808bc3d3a9c487d94cd02eec408b275ff0aa150f2e8e5191f8"},
+    {file = "numexpr-2.8.5-cp38-cp38-win32.whl", hash = "sha256:558390fea6370003ac749ed9d0f38d708aa096f5dcb707ddb6e0ca5a0dd37da1"},
+    {file = "numexpr-2.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:55983806815035eb63c5039520688c49536bb7f3cc3fc1d7d64c6a00cf3f353e"},
+    {file = "numexpr-2.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1510da20e6f5f45333610b1ded44c566e2690c6c437c84f2a212ca09627c7e01"},
+    {file = "numexpr-2.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e8b5bf7bcb4e8dcd66522d8fc96e1db7278f901cb4fd2e155efbe62a41dde08"},
+    {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ed0e1c1ef5f34381448539f1fe9015906d21c9cfa2797c06194d4207dadb465"},
+    {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aea6ab45c87c0a7041183c08a798f0ad4d7c5eccbce20cfe79ce6f1a45ef3702"},
+    {file = "numexpr-2.8.5-cp39-cp39-win32.whl", hash = "sha256:cbfd833ee5fdb0efb862e152aee7e6ccea9c596d5c11d22604c2e6307bff7cad"},
+    {file = "numexpr-2.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:283ce8609a7ccbadf91a68f3484558b3e36d27c93c98a41ec205efb0ab43c872"},
+    {file = "numexpr-2.8.5.tar.gz", hash = "sha256:45ed41e55a0abcecf3d711481e12a5fb7a904fe99d42bc282a17cc5f8ea510be"},
 ]
 
 [package.dependencies]
@@ -877,42 +943,44 @@ numpy = ">=1.13.3"
 
 [[package]]
 name = "numpy"
-version = "1.25.1"
+version = "1.25.2"
 description = "Fundamental package for array computing in Python"
+category = "main"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "numpy-1.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d339465dff3eb33c701430bcb9c325b60354698340229e1dff97745e6b3efa"},
-    {file = "numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d736b75c3f2cb96843a5c7f8d8ccc414768d34b0a75f466c05f3a739b406f10b"},
-    {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a90725800caeaa160732d6b31f3f843ebd45d6b5f3eec9e8cc287e30f2805bf"},
-    {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c6c9261d21e617c6dc5eacba35cb68ec36bb72adcff0dee63f8fbc899362588"},
-    {file = "numpy-1.25.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0def91f8af6ec4bb94c370e38c575855bf1d0be8a8fbfba42ef9c073faf2cf19"},
-    {file = "numpy-1.25.1-cp310-cp310-win32.whl", hash = "sha256:fd67b306320dcadea700a8f79b9e671e607f8696e98ec255915c0c6d6b818503"},
-    {file = "numpy-1.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:c1516db588987450b85595586605742879e50dcce923e8973f79529651545b57"},
-    {file = "numpy-1.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b82655dd8efeea69dbf85d00fca40013d7f503212bc5259056244961268b66e"},
-    {file = "numpy-1.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e8f6049c4878cb16960fbbfb22105e49d13d752d4d8371b55110941fb3b17800"},
-    {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41a56b70e8139884eccb2f733c2f7378af06c82304959e174f8e7370af112e09"},
-    {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5154b1a25ec796b1aee12ac1b22f414f94752c5f94832f14d8d6c9ac40bcca6"},
-    {file = "numpy-1.25.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38eb6548bb91c421261b4805dc44def9ca1a6eef6444ce35ad1669c0f1a3fc5d"},
-    {file = "numpy-1.25.1-cp311-cp311-win32.whl", hash = "sha256:791f409064d0a69dd20579345d852c59822c6aa087f23b07b1b4e28ff5880fcb"},
-    {file = "numpy-1.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:c40571fe966393b212689aa17e32ed905924120737194b5d5c1b20b9ed0fb171"},
-    {file = "numpy-1.25.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3d7abcdd85aea3e6cdddb59af2350c7ab1ed764397f8eec97a038ad244d2d105"},
-    {file = "numpy-1.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a180429394f81c7933634ae49b37b472d343cccb5bb0c4a575ac8bbc433722f"},
-    {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d412c1697c3853c6fc3cb9751b4915859c7afe6a277c2bf00acf287d56c4e625"},
-    {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e1266411120a4f16fad8efa8e0454d21d00b8c7cee5b5ccad7565d95eb42dd"},
-    {file = "numpy-1.25.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f76aebc3358ade9eacf9bc2bb8ae589863a4f911611694103af05346637df1b7"},
-    {file = "numpy-1.25.1-cp39-cp39-win32.whl", hash = "sha256:247d3ffdd7775bdf191f848be8d49100495114c82c2bd134e8d5d075fb386a1c"},
-    {file = "numpy-1.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:1d5d3c68e443c90b38fdf8ef40e60e2538a27548b39b12b73132456847f4b631"},
-    {file = "numpy-1.25.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:35a9527c977b924042170a0887de727cd84ff179e478481404c5dc66b4170009"},
-    {file = "numpy-1.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d3fe3dd0506a28493d82dc3cf254be8cd0d26f4008a417385cbf1ae95b54004"},
-    {file = "numpy-1.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:012097b5b0d00a11070e8f2e261128c44157a8689f7dedcf35576e525893f4fe"},
-    {file = "numpy-1.25.1.tar.gz", hash = "sha256:9a3a9f3a61480cc086117b426a8bd86869c213fc4072e606f01c4e4b66eb92bf"},
+    {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
+    {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
+    {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
+    {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
+    {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
+    {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
+    {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
+    {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
+    {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
+    {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
+    {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
+    {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
 ]
 
 [[package]]
 name = "openai"
 version = "0.27.8"
 description = "Python client library for the OpenAI API"
+category = "main"
 optional = false
 python-versions = ">=3.7.1"
 files = [
@@ -927,7 +995,7 @@ tqdm = "*"
 
 [package.extras]
 datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"]
+dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"]
 embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
 wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
 
@@ -935,6 +1003,7 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1
 name = "openapi-schema-pydantic"
 version = "1.2.4"
 description = "OpenAPI (v3) specification schema as pydantic class"
+category = "main"
 optional = false
 python-versions = ">=3.6.1"
 files = [
@@ -949,6 +1018,7 @@ pydantic = ">=1.8.2"
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -956,46 +1026,32 @@ files = [
     {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
 ]
 
-[[package]]
-name = "promptlayer"
-version = "0.1.92"
-description = "PromptLayer is a package to keep track of your GPT models training"
-optional = false
-python-versions = "*"
-files = [
-    {file = "promptlayer-0.1.92.tar.gz", hash = "sha256:81e321933b99bf8d3e52813f4d7ee047d0c8cc29956ac59fb6f4a7e6e18a9b83"},
-]
-
-[package.dependencies]
-langchain = "*"
-requests = "*"
-
 [[package]]
 name = "psycopg2"
-version = "2.9.6"
+version = "2.9.7"
 description = "psycopg2 - Python-PostgreSQL Database Adapter"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "psycopg2-2.9.6-cp310-cp310-win32.whl", hash = "sha256:f7a7a5ee78ba7dc74265ba69e010ae89dae635eea0e97b055fb641a01a31d2b1"},
-    {file = "psycopg2-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:f75001a1cbbe523e00b0ef896a5a1ada2da93ccd752b7636db5a99bc57c44494"},
-    {file = "psycopg2-2.9.6-cp311-cp311-win32.whl", hash = "sha256:53f4ad0a3988f983e9b49a5d9765d663bbe84f508ed655affdb810af9d0972ad"},
-    {file = "psycopg2-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b81fcb9ecfc584f661b71c889edeae70bae30d3ef74fa0ca388ecda50b1222b7"},
-    {file = "psycopg2-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:11aca705ec888e4f4cea97289a0bf0f22a067a32614f6ef64fcf7b8bfbc53744"},
-    {file = "psycopg2-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:36c941a767341d11549c0fbdbb2bf5be2eda4caf87f65dfcd7d146828bd27f39"},
-    {file = "psycopg2-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:869776630c04f335d4124f120b7fb377fe44b0a7645ab3c34b4ba42516951889"},
-    {file = "psycopg2-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:a8ad4a47f42aa6aec8d061fdae21eaed8d864d4bb0f0cade5ad32ca16fcd6258"},
-    {file = "psycopg2-2.9.6-cp38-cp38-win32.whl", hash = "sha256:2362ee4d07ac85ff0ad93e22c693d0f37ff63e28f0615a16b6635a645f4b9214"},
-    {file = "psycopg2-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:d24ead3716a7d093b90b27b3d73459fe8cd90fd7065cf43b3c40966221d8c394"},
-    {file = "psycopg2-2.9.6-cp39-cp39-win32.whl", hash = "sha256:1861a53a6a0fd248e42ea37c957d36950da00266378746588eab4f4b5649e95f"},
-    {file = "psycopg2-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:ded2faa2e6dfb430af7713d87ab4abbfc764d8d7fb73eafe96a24155f906ebf5"},
-    {file = "psycopg2-2.9.6.tar.gz", hash = "sha256:f15158418fd826831b28585e2ab48ed8df2d0d98f502a2b4fe619e7d5ca29011"},
+    {file = "psycopg2-2.9.7-cp310-cp310-win32.whl", hash = "sha256:1a6a2d609bce44f78af4556bea0c62a5e7f05c23e5ea9c599e07678995609084"},
+    {file = "psycopg2-2.9.7-cp310-cp310-win_amd64.whl", hash = "sha256:b22ed9c66da2589a664e0f1ca2465c29b75aaab36fa209d4fb916025fb9119e5"},
+    {file = "psycopg2-2.9.7-cp311-cp311-win32.whl", hash = "sha256:44d93a0109dfdf22fe399b419bcd7fa589d86895d3931b01fb321d74dadc68f1"},
+    {file = "psycopg2-2.9.7-cp311-cp311-win_amd64.whl", hash = "sha256:91e81a8333a0037babfc9fe6d11e997a9d4dac0f38c43074886b0d9dead94fe9"},
+    {file = "psycopg2-2.9.7-cp37-cp37m-win32.whl", hash = "sha256:d1210fcf99aae6f728812d1d2240afc1dc44b9e6cba526a06fb8134f969957c2"},
+    {file = "psycopg2-2.9.7-cp37-cp37m-win_amd64.whl", hash = "sha256:e9b04cbef584310a1ac0f0d55bb623ca3244c87c51187645432e342de9ae81a8"},
+    {file = "psycopg2-2.9.7-cp38-cp38-win32.whl", hash = "sha256:d5c5297e2fbc8068d4255f1e606bfc9291f06f91ec31b2a0d4c536210ac5c0a2"},
+    {file = "psycopg2-2.9.7-cp38-cp38-win_amd64.whl", hash = "sha256:8275abf628c6dc7ec834ea63f6f3846bf33518907a2b9b693d41fd063767a866"},
+    {file = "psycopg2-2.9.7-cp39-cp39-win32.whl", hash = "sha256:c7949770cafbd2f12cecc97dea410c514368908a103acf519f2a346134caa4d5"},
+    {file = "psycopg2-2.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:b6bd7d9d3a7a63faae6edf365f0ed0e9b0a1aaf1da3ca146e6b043fb3eb5d723"},
+    {file = "psycopg2-2.9.7.tar.gz", hash = "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8"},
 ]
 
 [[package]]
 name = "pydantic"
 version = "1.10.12"
 description = "Data validation and settings management using python type hints"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1048,6 +1104,7 @@ email = ["email-validator (>=1.0.3)"]
 name = "python-dotenv"
 version = "1.0.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1062,6 +1119,7 @@ cli = ["click (>=5.0)"]
 name = "pyyaml"
 version = "6.0.1"
 description = "YAML parser and emitter for Python"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1109,105 +1167,127 @@ files = [
 
 [[package]]
 name = "regex"
-version = "2023.6.3"
+version = "2023.8.8"
 description = "Alternative regular expression module, to replace re."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "regex-2023.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:824bf3ac11001849aec3fa1d69abcb67aac3e150a933963fb12bda5151fe1bfd"},
-    {file = "regex-2023.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05ed27acdf4465c95826962528f9e8d41dbf9b1aa8531a387dee6ed215a3e9ef"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b49c764f88a79160fa64f9a7b425620e87c9f46095ef9c9920542ab2495c8bc"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e3f1316c2293e5469f8f09dc2d76efb6c3982d3da91ba95061a7e69489a14ef"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43e1dd9d12df9004246bacb79a0e5886b3b6071b32e41f83b0acbf293f820ee8"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4959e8bcbfda5146477d21c3a8ad81b185cd252f3d0d6e4724a5ef11c012fb06"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af4dd387354dc83a3bff67127a124c21116feb0d2ef536805c454721c5d7993d"},
-    {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2239d95d8e243658b8dbb36b12bd10c33ad6e6933a54d36ff053713f129aa536"},
-    {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:890e5a11c97cf0d0c550eb661b937a1e45431ffa79803b942a057c4fb12a2da2"},
-    {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a8105e9af3b029f243ab11ad47c19b566482c150c754e4c717900a798806b222"},
-    {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:25be746a8ec7bc7b082783216de8e9473803706723b3f6bef34b3d0ed03d57e2"},
-    {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:3676f1dd082be28b1266c93f618ee07741b704ab7b68501a173ce7d8d0d0ca18"},
-    {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:10cb847aeb1728412c666ab2e2000ba6f174f25b2bdc7292e7dd71b16db07568"},
-    {file = "regex-2023.6.3-cp310-cp310-win32.whl", hash = "sha256:dbbbfce33cd98f97f6bffb17801b0576e653f4fdb1d399b2ea89638bc8d08ae1"},
-    {file = "regex-2023.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:c5f8037000eb21e4823aa485149f2299eb589f8d1fe4b448036d230c3f4e68e0"},
-    {file = "regex-2023.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c123f662be8ec5ab4ea72ea300359023a5d1df095b7ead76fedcd8babbedf969"},
-    {file = "regex-2023.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9edcbad1f8a407e450fbac88d89e04e0b99a08473f666a3f3de0fd292badb6aa"},
-    {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcba6dae7de533c876255317c11f3abe4907ba7d9aa15d13e3d9710d4315ec0e"},
-    {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29cdd471ebf9e0f2fb3cac165efedc3c58db841d83a518b082077e612d3ee5df"},
-    {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12b74fbbf6cbbf9dbce20eb9b5879469e97aeeaa874145517563cca4029db65c"},
-    {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c29ca1bd61b16b67be247be87390ef1d1ef702800f91fbd1991f5c4421ebae8"},
-    {file = "regex-2023.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77f09bc4b55d4bf7cc5eba785d87001d6757b7c9eec237fe2af57aba1a071d9"},
-    {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ea353ecb6ab5f7e7d2f4372b1e779796ebd7b37352d290096978fea83c4dba0c"},
-    {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:10590510780b7541969287512d1b43f19f965c2ece6c9b1c00fc367b29d8dce7"},
-    {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2fbd6236aae3b7f9d514312cdb58e6494ee1c76a9948adde6eba33eb1c4264f"},
-    {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6b2675068c8b56f6bfd5a2bda55b8accbb96c02fd563704732fd1c95e2083461"},
-    {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74419d2b50ecb98360cfaa2974da8689cb3b45b9deff0dcf489c0d333bcc1477"},
-    {file = "regex-2023.6.3-cp311-cp311-win32.whl", hash = "sha256:fb5ec16523dc573a4b277663a2b5a364e2099902d3944c9419a40ebd56a118f9"},
-    {file = "regex-2023.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:09e4a1a6acc39294a36b7338819b10baceb227f7f7dbbea0506d419b5a1dd8af"},
-    {file = "regex-2023.6.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0654bca0cdf28a5956c83839162692725159f4cda8d63e0911a2c0dc76166525"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:463b6a3ceb5ca952e66550a4532cef94c9a0c80dc156c4cc343041951aec1697"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87b2a5bb5e78ee0ad1de71c664d6eb536dc3947a46a69182a90f4410f5e3f7dd"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6343c6928282c1f6a9db41f5fd551662310e8774c0e5ebccb767002fcf663ca9"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6192d5af2ccd2a38877bfef086d35e6659566a335b1492786ff254c168b1693"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74390d18c75054947e4194019077e243c06fbb62e541d8817a0fa822ea310c14"},
-    {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:742e19a90d9bb2f4a6cf2862b8b06dea5e09b96c9f2df1779e53432d7275331f"},
-    {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8abbc5d54ea0ee80e37fef009e3cec5dafd722ed3c829126253d3e22f3846f1e"},
-    {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c2b867c17a7a7ae44c43ebbeb1b5ff406b3e8d5b3e14662683e5e66e6cc868d3"},
-    {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:d831c2f8ff278179705ca59f7e8524069c1a989e716a1874d6d1aab6119d91d1"},
-    {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:ee2d1a9a253b1729bb2de27d41f696ae893507c7db224436abe83ee25356f5c1"},
-    {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:61474f0b41fe1a80e8dfa70f70ea1e047387b7cd01c85ec88fa44f5d7561d787"},
-    {file = "regex-2023.6.3-cp36-cp36m-win32.whl", hash = "sha256:0b71e63226e393b534105fcbdd8740410dc6b0854c2bfa39bbda6b0d40e59a54"},
-    {file = "regex-2023.6.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bbb02fd4462f37060122e5acacec78e49c0fbb303c30dd49c7f493cf21fc5b27"},
-    {file = "regex-2023.6.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b862c2b9d5ae38a68b92e215b93f98d4c5e9454fa36aae4450f61dd33ff48487"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:976d7a304b59ede34ca2921305b57356694f9e6879db323fd90a80f865d355a3"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83320a09188e0e6c39088355d423aa9d056ad57a0b6c6381b300ec1a04ec3d16"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9427a399501818a7564f8c90eced1e9e20709ece36be701f394ada99890ea4b3"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178bbc1b2ec40eaca599d13c092079bf529679bf0371c602edaa555e10b41c3"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:837328d14cde912af625d5f303ec29f7e28cdab588674897baafaf505341f2fc"},
-    {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d44dc13229905ae96dd2ae2dd7cebf824ee92bc52e8cf03dcead37d926da019"},
-    {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d54af539295392611e7efbe94e827311eb8b29668e2b3f4cadcfe6f46df9c777"},
-    {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7117d10690c38a622e54c432dfbbd3cbd92f09401d622902c32f6d377e2300ee"},
-    {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bb60b503ec8a6e4e3e03a681072fa3a5adcbfa5479fa2d898ae2b4a8e24c4591"},
-    {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:65ba8603753cec91c71de423a943ba506363b0e5c3fdb913ef8f9caa14b2c7e0"},
-    {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:271f0bdba3c70b58e6f500b205d10a36fb4b58bd06ac61381b68de66442efddb"},
-    {file = "regex-2023.6.3-cp37-cp37m-win32.whl", hash = "sha256:9beb322958aaca059f34975b0df135181f2e5d7a13b84d3e0e45434749cb20f7"},
-    {file = "regex-2023.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fea75c3710d4f31389eed3c02f62d0b66a9da282521075061ce875eb5300cf23"},
-    {file = "regex-2023.6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f56fcb7ff7bf7404becdfc60b1e81a6d0561807051fd2f1860b0d0348156a07"},
-    {file = "regex-2023.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d2da3abc88711bce7557412310dfa50327d5769a31d1c894b58eb256459dc289"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a99b50300df5add73d307cf66abea093304a07eb017bce94f01e795090dea87c"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5708089ed5b40a7b2dc561e0c8baa9535b77771b64a8330b684823cfd5116036"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:687ea9d78a4b1cf82f8479cab23678aff723108df3edeac098e5b2498879f4a7"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3850beab9f527f06ccc94b446c864059c57651b3f911fddb8d9d3ec1d1b25d"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8915cc96abeb8983cea1df3c939e3c6e1ac778340c17732eb63bb96247b91d2"},
-    {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:841d6e0e5663d4c7b4c8099c9997be748677d46cbf43f9f471150e560791f7ff"},
-    {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9edce5281f965cf135e19840f4d93d55b3835122aa76ccacfd389e880ba4cf82"},
-    {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b956231ebdc45f5b7a2e1f90f66a12be9610ce775fe1b1d50414aac1e9206c06"},
-    {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:36efeba71c6539d23c4643be88295ce8c82c88bbd7c65e8a24081d2ca123da3f"},
-    {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:cf67ca618b4fd34aee78740bea954d7c69fdda419eb208c2c0c7060bb822d747"},
-    {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b4598b1897837067a57b08147a68ac026c1e73b31ef6e36deeeb1fa60b2933c9"},
-    {file = "regex-2023.6.3-cp38-cp38-win32.whl", hash = "sha256:f415f802fbcafed5dcc694c13b1292f07fe0befdb94aa8a52905bd115ff41e88"},
-    {file = "regex-2023.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:d4f03bb71d482f979bda92e1427f3ec9b220e62a7dd337af0aa6b47bf4498f72"},
-    {file = "regex-2023.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccf91346b7bd20c790310c4147eee6ed495a54ddb6737162a36ce9dbef3e4751"},
-    {file = "regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b28f5024a3a041009eb4c333863d7894d191215b39576535c6734cd88b0fcb68"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0bb18053dfcfed432cc3ac632b5e5e5c5b7e55fb3f8090e867bfd9b054dbcbf"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5bfb3004f2144a084a16ce19ca56b8ac46e6fd0651f54269fc9e230edb5e4a"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c6b48d0fa50d8f4df3daf451be7f9689c2bde1a52b1225c5926e3f54b6a9ed1"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051da80e6eeb6e239e394ae60704d2b566aa6a7aed6f2890a7967307267a5dc6"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4c3b7fa4cdaa69268748665a1a6ff70c014d39bb69c50fda64b396c9116cf77"},
-    {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:457b6cce21bee41ac292d6753d5e94dcbc5c9e3e3a834da285b0bde7aa4a11e9"},
-    {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aad51907d74fc183033ad796dd4c2e080d1adcc4fd3c0fd4fd499f30c03011cd"},
-    {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0385e73da22363778ef2324950e08b689abdf0b108a7d8decb403ad7f5191938"},
-    {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c6a57b742133830eec44d9b2290daf5cbe0a2f1d6acee1b3c7b1c7b2f3606df7"},
-    {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3e5219bf9e75993d73ab3d25985c857c77e614525fac9ae02b1bebd92f7cecac"},
-    {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e5087a3c59eef624a4591ef9eaa6e9a8d8a94c779dade95d27c0bc24650261cd"},
-    {file = "regex-2023.6.3-cp39-cp39-win32.whl", hash = "sha256:20326216cc2afe69b6e98528160b225d72f85ab080cbdf0b11528cbbaba2248f"},
-    {file = "regex-2023.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:bdff5eab10e59cf26bc479f565e25ed71a7d041d1ded04ccf9aee1d9f208487a"},
-    {file = "regex-2023.6.3.tar.gz", hash = "sha256:72d1a25bf36d2050ceb35b517afe13864865268dfb45910e2e17a84be6cbfeb0"},
+    {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"},
+    {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c662a4cbdd6280ee56f841f14620787215a171c4e2d1744c9528bed8f5816c96"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf0633e4a1b667bfe0bb10b5e53fe0d5f34a6243ea2530eb342491f1adf4f739"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551ad543fa19e94943c5b2cebc54c73353ffff08228ee5f3376bd27b3d5b9800"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54de2619f5ea58474f2ac211ceea6b615af2d7e4306220d4f3fe690c91988a61"},
+    {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ec4b3f0aebbbe2fc0134ee30a791af522a92ad9f164858805a77442d7d18570"},
+    {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ae646c35cb9f820491760ac62c25b6d6b496757fda2d51be429e0e7b67ae0ab"},
+    {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca339088839582d01654e6f83a637a4b8194d0960477b9769d2ff2cfa0fa36d2"},
+    {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d9b6627408021452dcd0d2cdf8da0534e19d93d070bfa8b6b4176f99711e7f90"},
+    {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:bd3366aceedf274f765a3a4bc95d6cd97b130d1dda524d8f25225d14123c01db"},
+    {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7aed90a72fc3654fba9bc4b7f851571dcc368120432ad68b226bd593f3f6c0b7"},
+    {file = "regex-2023.8.8-cp310-cp310-win32.whl", hash = "sha256:80b80b889cb767cc47f31d2b2f3dec2db8126fbcd0cff31b3925b4dc6609dcdb"},
+    {file = "regex-2023.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:b82edc98d107cbc7357da7a5a695901b47d6eb0420e587256ba3ad24b80b7d0b"},
+    {file = "regex-2023.8.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1e7d84d64c84ad97bf06f3c8cb5e48941f135ace28f450d86af6b6512f1c9a71"},
+    {file = "regex-2023.8.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce0f9fbe7d295f9922c0424a3637b88c6c472b75eafeaff6f910494a1fa719ef"},
+    {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06c57e14ac723b04458df5956cfb7e2d9caa6e9d353c0b4c7d5d54fcb1325c46"},
+    {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7a9aaa5a1267125eef22cef3b63484c3241aaec6f48949b366d26c7250e0357"},
+    {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b7408511fca48a82a119d78a77c2f5eb1b22fe88b0d2450ed0756d194fe7a9a"},
+    {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14dc6f2d88192a67d708341f3085df6a4f5a0c7b03dec08d763ca2cd86e9f559"},
+    {file = "regex-2023.8.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48c640b99213643d141550326f34f0502fedb1798adb3c9eb79650b1ecb2f177"},
+    {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0085da0f6c6393428bf0d9c08d8b1874d805bb55e17cb1dfa5ddb7cfb11140bf"},
+    {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:964b16dcc10c79a4a2be9f1273fcc2684a9eedb3906439720598029a797b46e6"},
+    {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7ce606c14bb195b0e5108544b540e2c5faed6843367e4ab3deb5c6aa5e681208"},
+    {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40f029d73b10fac448c73d6eb33d57b34607f40116e9f6e9f0d32e9229b147d7"},
+    {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3b8e6ea6be6d64104d8e9afc34c151926f8182f84e7ac290a93925c0db004bfd"},
+    {file = "regex-2023.8.8-cp311-cp311-win32.whl", hash = "sha256:942f8b1f3b223638b02df7df79140646c03938d488fbfb771824f3d05fc083a8"},
+    {file = "regex-2023.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:51d8ea2a3a1a8fe4f67de21b8b93757005213e8ac3917567872f2865185fa7fb"},
+    {file = "regex-2023.8.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e951d1a8e9963ea51efd7f150450803e3b95db5939f994ad3d5edac2b6f6e2b4"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704f63b774218207b8ccc6c47fcef5340741e5d839d11d606f70af93ee78e4d4"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22283c769a7b01c8ac355d5be0715bf6929b6267619505e289f792b01304d898"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91129ff1bb0619bc1f4ad19485718cc623a2dc433dff95baadbf89405c7f6b57"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de35342190deb7b866ad6ba5cbcccb2d22c0487ee0cbb251efef0843d705f0d4"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b993b6f524d1e274a5062488a43e3f9f8764ee9745ccd8e8193df743dbe5ee61"},
+    {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3026cbcf11d79095a32d9a13bbc572a458727bd5b1ca332df4a79faecd45281c"},
+    {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:293352710172239bf579c90a9864d0df57340b6fd21272345222fb6371bf82b3"},
+    {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d909b5a3fff619dc7e48b6b1bedc2f30ec43033ba7af32f936c10839e81b9217"},
+    {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3d370ff652323c5307d9c8e4c62efd1956fb08051b0e9210212bc51168b4ff56"},
+    {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:b076da1ed19dc37788f6a934c60adf97bd02c7eea461b73730513921a85d4235"},
+    {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e9941a4ada58f6218694f382e43fdd256e97615db9da135e77359da257a7168b"},
+    {file = "regex-2023.8.8-cp36-cp36m-win32.whl", hash = "sha256:a8c65c17aed7e15a0c824cdc63a6b104dfc530f6fa8cb6ac51c437af52b481c7"},
+    {file = "regex-2023.8.8-cp36-cp36m-win_amd64.whl", hash = "sha256:aadf28046e77a72f30dcc1ab185639e8de7f4104b8cb5c6dfa5d8ed860e57236"},
+    {file = "regex-2023.8.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:423adfa872b4908843ac3e7a30f957f5d5282944b81ca0a3b8a7ccbbfaa06103"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ae594c66f4a7e1ea67232a0846649a7c94c188d6c071ac0210c3e86a5f92109"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e51c80c168074faa793685656c38eb7a06cbad7774c8cbc3ea05552d615393d8"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09b7f4c66aa9d1522b06e31a54f15581c37286237208df1345108fcf4e050c18"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e73e5243af12d9cd6a9d6a45a43570dbe2e5b1cdfc862f5ae2b031e44dd95a8"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:941460db8fe3bd613db52f05259c9336f5a47ccae7d7def44cc277184030a116"},
+    {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f0ccf3e01afeb412a1a9993049cb160d0352dba635bbca7762b2dc722aa5742a"},
+    {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e9216e0d2cdce7dbc9be48cb3eacb962740a09b011a116fd7af8c832ab116ca"},
+    {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5cd9cd7170459b9223c5e592ac036e0704bee765706445c353d96f2890e816c8"},
+    {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4873ef92e03a4309b3ccd8281454801b291b689f6ad45ef8c3658b6fa761d7ac"},
+    {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:239c3c2a339d3b3ddd51c2daef10874410917cd2b998f043c13e2084cb191684"},
+    {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1005c60ed7037be0d9dea1f9c53cc42f836188227366370867222bda4c3c6bd7"},
+    {file = "regex-2023.8.8-cp37-cp37m-win32.whl", hash = "sha256:e6bd1e9b95bc5614a7a9c9c44fde9539cba1c823b43a9f7bc11266446dd568e3"},
+    {file = "regex-2023.8.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9a96edd79661e93327cfeac4edec72a4046e14550a1d22aa0dd2e3ca52aec921"},
+    {file = "regex-2023.8.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2181c20ef18747d5f4a7ea513e09ea03bdd50884a11ce46066bb90fe4213675"},
+    {file = "regex-2023.8.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2ad5add903eb7cdde2b7c64aaca405f3957ab34f16594d2b78d53b8b1a6a7d6"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9233ac249b354c54146e392e8a451e465dd2d967fc773690811d3a8c240ac601"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920974009fb37b20d32afcdf0227a2e707eb83fe418713f7a8b7de038b870d0b"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2b6c5dfe0929b6c23dde9624483380b170b6e34ed79054ad131b20203a1a63"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96979d753b1dc3b2169003e1854dc67bfc86edf93c01e84757927f810b8c3c93"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ae54a338191e1356253e7883d9d19f8679b6143703086245fb14d1f20196be9"},
+    {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2162ae2eb8b079622176a81b65d486ba50b888271302190870b8cc488587d280"},
+    {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c884d1a59e69e03b93cf0dfee8794c63d7de0ee8f7ffb76e5f75be8131b6400a"},
+    {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf9273e96f3ee2ac89ffcb17627a78f78e7516b08f94dc435844ae72576a276e"},
+    {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:83215147121e15d5f3a45d99abeed9cf1fe16869d5c233b08c56cdf75f43a504"},
+    {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f7454aa427b8ab9101f3787eb178057c5250478e39b99540cfc2b889c7d0586"},
+    {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0640913d2c1044d97e30d7c41728195fc37e54d190c5385eacb52115127b882"},
+    {file = "regex-2023.8.8-cp38-cp38-win32.whl", hash = "sha256:0c59122ceccb905a941fb23b087b8eafc5290bf983ebcb14d2301febcbe199c7"},
+    {file = "regex-2023.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:c12f6f67495ea05c3d542d119d270007090bad5b843f642d418eb601ec0fa7be"},
+    {file = "regex-2023.8.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82cd0a69cd28f6cc3789cc6adeb1027f79526b1ab50b1f6062bbc3a0ccb2dbc3"},
+    {file = "regex-2023.8.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb34d1605f96a245fc39790a117ac1bac8de84ab7691637b26ab2c5efb8f228c"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:987b9ac04d0b38ef4f89fbc035e84a7efad9cdd5f1e29024f9289182c8d99e09"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dd6082f4e2aec9b6a0927202c85bc1b09dcab113f97265127c1dc20e2e32495"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7eb95fe8222932c10d4436e7a6f7c99991e3fdd9f36c949eff16a69246dee2dc"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7098c524ba9f20717a56a8d551d2ed491ea89cbf37e540759ed3b776a4f8d6eb"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b694430b3f00eb02c594ff5a16db30e054c1b9589a043fe9174584c6efa8033"},
+    {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2aeab3895d778155054abea5238d0eb9a72e9242bd4b43f42fd911ef9a13470"},
+    {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:988631b9d78b546e284478c2ec15c8a85960e262e247b35ca5eaf7ee22f6050a"},
+    {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:67ecd894e56a0c6108ec5ab1d8fa8418ec0cff45844a855966b875d1039a2e34"},
+    {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:14898830f0a0eb67cae2bbbc787c1a7d6e34ecc06fbd39d3af5fe29a4468e2c9"},
+    {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f2200e00b62568cfd920127782c61bc1c546062a879cdc741cfcc6976668dfcf"},
+    {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9691a549c19c22d26a4f3b948071e93517bdf86e41b81d8c6ac8a964bb71e5a6"},
+    {file = "regex-2023.8.8-cp39-cp39-win32.whl", hash = "sha256:6ab2ed84bf0137927846b37e882745a827458689eb969028af8032b1b3dac78e"},
+    {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"},
+    {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"},
 ]
 
+[[package]]
+name = "replicate"
+version = "0.11.0"
+description = "Python client for Replicate"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "replicate-0.11.0-py3-none-any.whl", hash = "sha256:fbb8815068864dc822cd4fa7b6103d6f4089d6ef122abd6c3441ca0f0f110c46"},
+    {file = "replicate-0.11.0.tar.gz", hash = "sha256:4d54b5838c1552a6f76cc37c3af8d9a7998105382082d672acad31636ad443b5"},
+]
+
+[package.dependencies]
+packaging = "*"
+pydantic = ">1"
+requests = ">2"
+
+[package.extras]
+dev = ["black", "mypy", "pytest", "responses", "ruff"]
+
 [[package]]
 name = "requests"
 version = "2.31.0"
 description = "Python HTTP for Humans."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1229,6 +1309,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "smmap"
 version = "5.0.0"
 description = "A pure Python implementation of a sliding window memory map manager"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1240,6 +1321,7 @@ files = [
 name = "sniffio"
 version = "1.3.0"
 description = "Sniff out which async library your code is running under"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1249,61 +1331,62 @@ files = [
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.19"
+version = "2.0.20"
 description = "Database Abstraction Library"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9deaae357edc2091a9ed5d25e9ee8bba98bcfae454b3911adeaf159c2e9ca9e3"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bf0fd65b50a330261ec7fe3d091dfc1c577483c96a9fa1e4323e932961aa1b5"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d90ccc15ba1baa345796a8fb1965223ca7ded2d235ccbef80a47b85cea2d71a"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4e688f6784427e5f9479d1a13617f573de8f7d4aa713ba82813bcd16e259d1"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:584f66e5e1979a7a00f4935015840be627e31ca29ad13f49a6e51e97a3fb8cae"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c69ce70047b801d2aba3e5ff3cba32014558966109fecab0c39d16c18510f15"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-win32.whl", hash = "sha256:96f0463573469579d32ad0c91929548d78314ef95c210a8115346271beeeaaa2"},
-    {file = "SQLAlchemy-2.0.19-cp310-cp310-win_amd64.whl", hash = "sha256:22bafb1da60c24514c141a7ff852b52f9f573fb933b1e6b5263f0daa28ce6db9"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d6894708eeb81f6d8193e996257223b6bb4041cb05a17cd5cf373ed836ef87a2"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8f2afd1aafded7362b397581772c670f20ea84d0a780b93a1a1529da7c3d369"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15afbf5aa76f2241184c1d3b61af1a72ba31ce4161013d7cb5c4c2fca04fd6e"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc05b59142445a4efb9c1fd75c334b431d35c304b0e33f4fa0ff1ea4890f92e"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5831138f0cc06b43edf5f99541c64adf0ab0d41f9a4471fd63b54ae18399e4de"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3afa8a21a9046917b3a12ffe016ba7ebe7a55a6fc0c7d950beb303c735c3c3ad"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-win32.whl", hash = "sha256:c896d4e6ab2eba2afa1d56be3d0b936c56d4666e789bfc59d6ae76e9fcf46145"},
-    {file = "SQLAlchemy-2.0.19-cp311-cp311-win_amd64.whl", hash = "sha256:024d2f67fb3ec697555e48caeb7147cfe2c08065a4f1a52d93c3d44fc8e6ad1c"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:89bc2b374ebee1a02fd2eae6fd0570b5ad897ee514e0f84c5c137c942772aa0c"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd4d410a76c3762511ae075d50f379ae09551d92525aa5bb307f8343bf7c2c12"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f469f15068cd8351826df4080ffe4cc6377c5bf7d29b5a07b0e717dddb4c7ea2"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cda283700c984e699e8ef0fcc5c61f00c9d14b6f65a4f2767c97242513fcdd84"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:43699eb3f80920cc39a380c159ae21c8a8924fe071bccb68fc509e099420b148"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-win32.whl", hash = "sha256:61ada5831db36d897e28eb95f0f81814525e0d7927fb51145526c4e63174920b"},
-    {file = "SQLAlchemy-2.0.19-cp37-cp37m-win_amd64.whl", hash = "sha256:57d100a421d9ab4874f51285c059003292433c648df6abe6c9c904e5bd5b0828"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:16a310f5bc75a5b2ce7cb656d0e76eb13440b8354f927ff15cbaddd2523ee2d1"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf7b5e3856cbf1876da4e9d9715546fa26b6e0ba1a682d5ed2fc3ca4c7c3ec5b"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e7b69d9ced4b53310a87117824b23c509c6fc1f692aa7272d47561347e133b6"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9eb4575bfa5afc4b066528302bf12083da3175f71b64a43a7c0badda2be365"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6b54d1ad7a162857bb7c8ef689049c7cd9eae2f38864fc096d62ae10bc100c7d"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5d6afc41ca0ecf373366fd8e10aee2797128d3ae45eb8467b19da4899bcd1ee0"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-win32.whl", hash = "sha256:430614f18443b58ceb9dedec323ecddc0abb2b34e79d03503b5a7579cd73a531"},
-    {file = "SQLAlchemy-2.0.19-cp38-cp38-win_amd64.whl", hash = "sha256:eb60699de43ba1a1f77363f563bb2c652f7748127ba3a774f7cf2c7804aa0d3d"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a752b7a9aceb0ba173955d4f780c64ee15a1a991f1c52d307d6215c6c73b3a4c"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7351c05db355da112e056a7b731253cbeffab9dfdb3be1e895368513c7d70106"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa51ce4aea583b0c6b426f4b0563d3535c1c75986c4373a0987d84d22376585b"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae7473a67cd82a41decfea58c0eac581209a0aa30f8bc9190926fbf628bb17f7"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:851a37898a8a39783aab603c7348eb5b20d83c76a14766a43f56e6ad422d1ec8"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:539010665c90e60c4a1650afe4ab49ca100c74e6aef882466f1de6471d414be7"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-win32.whl", hash = "sha256:f82c310ddf97b04e1392c33cf9a70909e0ae10a7e2ddc1d64495e3abdc5d19fb"},
-    {file = "SQLAlchemy-2.0.19-cp39-cp39-win_amd64.whl", hash = "sha256:8e712cfd2e07b801bc6b60fdf64853bc2bd0af33ca8fa46166a23fe11ce0dbb0"},
-    {file = "SQLAlchemy-2.0.19-py3-none-any.whl", hash = "sha256:314145c1389b021a9ad5aa3a18bac6f5d939f9087d7fc5443be28cba19d2c972"},
-    {file = "SQLAlchemy-2.0.19.tar.gz", hash = "sha256:77a14fa20264af73ddcdb1e2b9c5a829b8cc6b8304d0f093271980e36c200a3f"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759b51346aa388c2e606ee206c0bc6f15a5299f6174d1e10cadbe4530d3c7a98"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1506e988ebeaaf316f183da601f24eedd7452e163010ea63dbe52dc91c7fc70e"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5768c268df78bacbde166b48be788b83dddaa2a5974b8810af422ddfe68a9bc8"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f0dd6d15b6dc8b28a838a5c48ced7455c3e1fb47b89da9c79cc2090b072a50"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:243d0fb261f80a26774829bc2cee71df3222587ac789b7eaf6555c5b15651eed"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6eb6d77c31e1bf4268b4d61b549c341cbff9842f8e115ba6904249c20cb78a61"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-win32.whl", hash = "sha256:bcb04441f370cbe6e37c2b8d79e4af9e4789f626c595899d94abebe8b38f9a4d"},
+    {file = "SQLAlchemy-2.0.20-cp310-cp310-win_amd64.whl", hash = "sha256:d32b5ffef6c5bcb452723a496bad2d4c52b346240c59b3e6dba279f6dcc06c14"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd81466bdbc82b060c3c110b2937ab65ace41dfa7b18681fdfad2f37f27acdd7"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fe7d61dc71119e21ddb0094ee994418c12f68c61b3d263ebaae50ea8399c4d4"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4e571af672e1bb710b3cc1a9794b55bce1eae5aed41a608c0401885e3491179"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3364b7066b3c7f4437dd345d47271f1251e0cfb0aba67e785343cdbdb0fff08c"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1be86ccea0c965a1e8cd6ccf6884b924c319fcc85765f16c69f1ae7148eba64b"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1d35d49a972649b5080557c603110620a86aa11db350d7a7cb0f0a3f611948a0"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-win32.whl", hash = "sha256:27d554ef5d12501898d88d255c54eef8414576f34672e02fe96d75908993cf53"},
+    {file = "SQLAlchemy-2.0.20-cp311-cp311-win_amd64.whl", hash = "sha256:411e7f140200c02c4b953b3dbd08351c9f9818d2bd591b56d0fa0716bd014f1e"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c6aceebbc47db04f2d779db03afeaa2c73ea3f8dcd3987eb9efdb987ffa09a3"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d3f175410a6db0ad96b10bfbb0a5530ecd4fcf1e2b5d83d968dd64791f810ed"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea8186be85da6587456c9ddc7bf480ebad1a0e6dcbad3967c4821233a4d4df57"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c3d99ba99007dab8233f635c32b5cd24fb1df8d64e17bc7df136cedbea427897"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:76fdfc0f6f5341987474ff48e7a66c3cd2b8a71ddda01fa82fedb180b961630a"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-win32.whl", hash = "sha256:d3793dcf5bc4d74ae1e9db15121250c2da476e1af8e45a1d9a52b1513a393459"},
+    {file = "SQLAlchemy-2.0.20-cp37-cp37m-win_amd64.whl", hash = "sha256:79fde625a0a55220d3624e64101ed68a059c1c1f126c74f08a42097a72ff66a9"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:599ccd23a7146e126be1c7632d1d47847fa9f333104d03325c4e15440fc7d927"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1a58052b5a93425f656675673ef1f7e005a3b72e3f2c91b8acca1b27ccadf5f4"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79543f945be7a5ada9943d555cf9b1531cfea49241809dd1183701f94a748624"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63e73da7fb030ae0a46a9ffbeef7e892f5def4baf8064786d040d45c1d6d1dc5"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3ce5e81b800a8afc870bb8e0a275d81957e16f8c4b62415a7b386f29a0cb9763"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb0d3e94c2a84215532d9bcf10229476ffd3b08f481c53754113b794afb62d14"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-win32.whl", hash = "sha256:8dd77fd6648b677d7742d2c3cc105a66e2681cc5e5fb247b88c7a7b78351cf74"},
+    {file = "SQLAlchemy-2.0.20-cp38-cp38-win_amd64.whl", hash = "sha256:6f8a934f9dfdf762c844e5164046a9cea25fabbc9ec865c023fe7f300f11ca4a"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:26a3399eaf65e9ab2690c07bd5cf898b639e76903e0abad096cd609233ce5208"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4cde2e1096cbb3e62002efdb7050113aa5f01718035ba9f29f9d89c3758e7e4e"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b09ba72e4e6d341bb5bdd3564f1cea6095d4c3632e45dc69375a1dbe4e26ec"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b74eeafaa11372627ce94e4dc88a6751b2b4d263015b3523e2b1e57291102f0"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:77d37c1b4e64c926fa3de23e8244b964aab92963d0f74d98cbc0783a9e04f501"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eefebcc5c555803065128401a1e224a64607259b5eb907021bf9b175f315d2a6"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-win32.whl", hash = "sha256:3423dc2a3b94125094897118b52bdf4d37daf142cbcf26d48af284b763ab90e9"},
+    {file = "SQLAlchemy-2.0.20-cp39-cp39-win_amd64.whl", hash = "sha256:5ed61e3463021763b853628aef8bc5d469fe12d95f82c74ef605049d810f3267"},
+    {file = "SQLAlchemy-2.0.20-py3-none-any.whl", hash = "sha256:63a368231c53c93e2b67d0c5556a9836fdcd383f7e3026a39602aad775b14acf"},
+    {file = "SQLAlchemy-2.0.20.tar.gz", hash = "sha256:ca8a5ff2aa7f3ade6c498aaafce25b1eaeabe4e42b73e25519183e4566a16fc6"},
 ]
 
 [package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""}
+greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
 mypy = {version = ">=0.910", optional = true, markers = "extra == \"mypy\""}
 typing-extensions = ">=4.2.0"
 
 [package.extras]
-aiomysql = ["aiomysql", "greenlet (!=0.4.17)"]
+aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"]
 aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"]
 asyncio = ["greenlet (!=0.4.17)"]
 asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"]
@@ -1330,6 +1413,7 @@ sqlcipher = ["sqlcipher3-binary"]
 name = "starlette"
 version = "0.27.0"
 description = "The little ASGI library that shines."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1345,13 +1429,14 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam
 
 [[package]]
 name = "tenacity"
-version = "8.2.2"
+version = "8.2.3"
 description = "Retry code until it succeeds"
+category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "tenacity-8.2.2-py3-none-any.whl", hash = "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0"},
-    {file = "tenacity-8.2.2.tar.gz", hash = "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0"},
+    {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"},
+    {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"},
 ]
 
 [package.extras]
@@ -1361,6 +1446,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"]
 name = "tiktoken"
 version = "0.4.0"
 description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1406,6 +1492,7 @@ blobfile = ["blobfile (>=2)"]
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1415,20 +1502,21 @@ files = [
 
 [[package]]
 name = "tqdm"
-version = "4.65.0"
+version = "4.66.1"
 description = "Fast, Extensible Progress Meter"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
-    {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
+    {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"},
+    {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"},
 ]
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
 [package.extras]
-dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
 notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
@@ -1437,6 +1525,7 @@ telegram = ["requests"]
 name = "typing-extensions"
 version = "4.7.1"
 description = "Backported and Experimental Type Hints for Python 3.7+"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1448,6 +1537,7 @@ files = [
 name = "typing-inspect"
 version = "0.9.0"
 description = "Runtime inspection utilities for typing module."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1463,6 +1553,7 @@ typing-extensions = ">=3.7.4"
 name = "urllib3"
 version = "2.0.4"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1478,18 +1569,20 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "uvicorn"
-version = "0.22.0"
+version = "0.23.2"
 description = "The lightning-fast ASGI server."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"},
-    {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"},
+    {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"},
+    {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"},
 ]
 
 [package.dependencies]
 click = ">=7.0"
 h11 = ">=0.8"
+typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
 
 [package.extras]
 standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
@@ -1498,6 +1591,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
 name = "yarl"
 version = "1.9.2"
 description = "Yet another URL library"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1584,4 +1678,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "856689b2b909f48940783c8da91c4774528e50a3fc0dc08e425f570827a83b09"
+content-hash = "87a7b3b9660b3adad9c83f2095febfcb80d6bf6b75c30be1a3f29b1716da9aa2"
diff --git a/module_programming_llm/pyproject.toml b/module_programming_llm/pyproject.toml
index 43fec6321..425b47051 100644
--- a/module_programming_llm/pyproject.toml
+++ b/module_programming_llm/pyproject.toml
@@ -9,11 +9,12 @@ license = "MIT"
 python = "^3.10"
 athena = {path = "../athena", develop = true}
 openai = "^0.27.8"
-langchain = "0.0.225"
+langchain = "^0.0.267"
+python-dotenv = "^1.0.0"
+nltk = "^3.8.1"
 gitpython = "^3.1.31"
+replicate = "^0.11.0"
 tiktoken = "^0.4.0"
-promptlayer = "^0.1.85"
-python-dotenv = "^1.0.0"
 
 [tool.poetry.scripts]
 module = "athena:run_module"

From 5eb465d76e5c41b202f46a8f6099942c1a14d64d Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 16:10:08 +0200
Subject: [PATCH 02/51] add some changes

---
 env_example/module_programming_llm.env        |  35 +-
 module_programming_llm/.env.example           |  34 +-
 .../module_programming_llm/__main__.py        |  35 +-
 .../basic/basic_feedback_provider.py          | 182 +++---
 .../basic/file_instructions.py                |  59 --
 .../basic/prompts/basic_feedback_provider.py  |  40 --
 .../generate_file_grading_instructions.py     |  15 -
 .../generate_file_problem_statements.py       |  15 -
 .../module_programming_llm/config.py          |  69 +++
 .../generate_suggestions_by_file.py           | 179 ++++++
 .../helpers/llm_utils.py                      | 152 +++++
 .../module_programming_llm/helpers/models.py  |  36 --
 .../helpers/models/__init__.py                |  36 ++
 .../helpers/models/model_config.py            |  10 +
 .../helpers/models/openai.py                  | 321 +++++++++++
 .../helpers/models/replicate.py               | 110 ++++
 .../module_programming_llm/helpers/utils.py   |   2 +-
 .../prompts/generate_suggestions_by_file.py   |  27 +
 .../split_grading_instructions_by_file.py     |  13 +
 .../split_problem_statement_by_file.py        |  13 +
 .../split_grading_instructions_by_file.py     |  95 +++
 .../split_problem_statement_by_file.py        |  95 +++
 module_programming_llm/poetry.lock            | 542 +++++++++++++++++-
 module_programming_llm/pyproject.toml         |   3 +
 24 files changed, 1827 insertions(+), 291 deletions(-)
 delete mode 100644 module_programming_llm/module_programming_llm/basic/file_instructions.py
 delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py
 delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py
 delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py
 create mode 100644 module_programming_llm/module_programming_llm/config.py
 create mode 100644 module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
 create mode 100644 module_programming_llm/module_programming_llm/helpers/llm_utils.py
 delete mode 100644 module_programming_llm/module_programming_llm/helpers/models.py
 create mode 100644 module_programming_llm/module_programming_llm/helpers/models/__init__.py
 create mode 100644 module_programming_llm/module_programming_llm/helpers/models/model_config.py
 create mode 100644 module_programming_llm/module_programming_llm/helpers/models/openai.py
 create mode 100644 module_programming_llm/module_programming_llm/helpers/models/replicate.py
 create mode 100644 module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
 create mode 100644 module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
 create mode 100644 module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
 create mode 100644 module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
 create mode 100644 module_programming_llm/module_programming_llm/split_problem_statement_by_file.py

diff --git a/env_example/module_programming_llm.env b/env_example/module_programming_llm.env
index a4d7be5f5..0df776702 100644
--- a/env_example/module_programming_llm.env
+++ b/env_example/module_programming_llm.env
@@ -2,10 +2,33 @@ PRODUCTION=1
 SECRET=12345abcdef
 DATABASE_URL=postgresql://postgres:password@postgres:5432/athena
 
-OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 
-# Can be empty if Azure API is not used
-OPENAI_API_TYPE="azure" # change to "azure" if Azure is used
-OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
-OPENAI_API_VERSION="2023-03-15-preview" # change version if needed
-AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name
\ No newline at end of file
+################################################################
+# LLM Credentials                                              #
+################################################################
+
+# Default model to use
+# See below for options, available models are also logged on startup
+LLM_DEFAULT_MODEL="azure_openai_gpt-35"
+
+# Standard OpenAI (Non-Azure) [leave blank if not used]
+# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
+# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models)
+LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+# Azure OpenAI [leave blank if not used]
+# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
+LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
+LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed
+
+# Replicate [leave blank if not used]
+# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py`
+REPLICATE_API_TOKEN=
+
+# LangSmith (can be used for tracing LLMs) [leave blank if not used]
+# See https://docs.smith.langchain.com
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+# LANGCHAIN_API_KEY="XXX"
+# LANGCHAIN_PROJECT="XXX"
\ No newline at end of file
diff --git a/module_programming_llm/.env.example b/module_programming_llm/.env.example
index f7b7f048b..2b6d9a889 100644
--- a/module_programming_llm/.env.example
+++ b/module_programming_llm/.env.example
@@ -6,10 +6,32 @@ SECRET=12345abcdef
 DATABASE_URL=sqlite:///../data/data.sqlite
 
 
-OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+################################################################
+# LLM Credentials                                              #
+################################################################
 
-# Can be empty if Azure API is not used
-OPENAI_API_TYPE="azure" # change to "azure" if Azure is used
-OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
-OPENAI_API_VERSION="2023-03-15-preview" # change version if needed
-AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name
\ No newline at end of file
+# Default model to use
+# See below for options, available models are also logged on startup
+LLM_DEFAULT_MODEL="azure_openai_gpt-35"
+
+# Standard OpenAI (Non-Azure) [leave blank if not used]
+# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
+# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models)
+LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+# Azure OpenAI [leave blank if not used]
+# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
+LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
+LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed
+
+# Replicate [leave blank if not used]
+# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py`
+REPLICATE_API_TOKEN=
+
+# LangSmith (can be used for tracing LLMs) [leave blank if not used]
+# See https://docs.smith.langchain.com
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+# LANGCHAIN_API_KEY="XXX"
+# LANGCHAIN_PROJECT="XXX"
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py
index ca08b7a04..e4fb070fe 100644
--- a/module_programming_llm/module_programming_llm/__main__.py
+++ b/module_programming_llm/module_programming_llm/__main__.py
@@ -1,50 +1,43 @@
 from typing import List
 
+import tiktoken
+
 from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider
-from athena.storage import store_exercise
 from athena.programming import Exercise, Submission, Feedback
 from athena.logger import logger
+from module_programming_llm.config import Configuration
 
-from module_programming_llm.basic.basic_feedback_provider import suggest_feedback as suggest_feedback_basic
-from module_programming_llm.basic.file_instructions import generate_file_grading_instructions, generate_file_problem_statements
+from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file
+from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
+from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
 
 
 @submissions_consumer
-def receive_submissions(exercise: Exercise, submissions: List[Submission]):
+def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration):
     logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)
 
-    # Split problem statements and grading instructions 
-    exercise.meta['file_grading_instructions'] = generate_file_grading_instructions(exercise)
-    exercise.meta['file_problem_statements'] = generate_file_problem_statements(exercise)
-
-    store_exercise(exercise)
+    # Split problem statements and grading instructions for later
+    generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
+    generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
 
 
 @submission_selector
 def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission:
     logger.info("select_submission: Received %d, submissions for exercise %d", len(submissions), exercise.id)
-    # Always return the first submission
     return submissions[0]
 
 
 @feedback_consumer
 def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
-    logger.info("process_feedback: Received feedbacks for submission %d of exercise %d.", submission.id, exercise.id)
-    logger.info("process_feedback: Feedbacks: %s", feedbacks)
-    # Do something with the feedback
+    logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)
 
 
 @feedback_provider
-async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
+async def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]:
     logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id)
-    # Do something with the submission and return a list of feedback
-
-    # Check if file based grading instructions and problem statements are available
-    if 'file_grading_instructions' in exercise.meta and 'file_problem_statements' in exercise.meta:
-        return await suggest_feedback_basic(exercise, submission)
-    logger.info("suggest_feedback: No file based grading instructions and problem statements available. Skipping feedback generation.")
-    return []
+    return await generate_suggestions_by_file(exercise, submission, module_config.approach, module_config.debug)
 
 
 if __name__ == "__main__":
+    tiktoken.get_encoding("cl100k_base")
     app.start()
diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
index 33fc8564c..134fd0e41 100644
--- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
+++ b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
@@ -1,110 +1,110 @@
-import json
-from typing import List
+# import json
+# from typing import List
 
-from langchain.chains import LLMChain
-from langchain.prompts import (
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
+# from langchain.chains import LLMChain
+# from langchain.prompts import (
+#     ChatPromptTemplate,
+#     SystemMessagePromptTemplate,
+#     HumanMessagePromptTemplate,
+# )
 
-from athena.programming import Exercise, Submission, Feedback
-from athena.logger import logger
+# from athena.programming import Exercise, Submission, Feedback
+# from athena.logger import logger
 
-from module_programming_llm.helpers.utils import get_diff, get_file_extension, load_files_from_repo, add_line_numbers
-from module_programming_llm.helpers.models import chat
+# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
+# from module_programming_llm.helpers.models import chat
 
-from .prompts.basic_feedback_provider import system_template, human_template
+# from ..prompts.basic_feedback_provider import system_template, human_template
 
-async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
-    max_prompt_length = 2560
-    input_list: List[dict] = []
+# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
+#     max_prompt_length = 2560
+#     input_list: List[dict] = []
 
-    if exercise.meta['file_grading_instructions'] is None:
-        raise ValueError("No file grading instructions found for exercise in meta.")
-    if exercise.meta['file_problem_statements'] is None:
-        raise ValueError("No file problem statements found for exercise in meta.")
+#     if exercise.meta['file_grading_instructions'] is None:
+#         raise ValueError("No file grading instructions found for exercise in meta.")
+#     if exercise.meta['file_problem_statements'] is None:
+#         raise ValueError("No file problem statements found for exercise in meta.")
 
-    # Feature extraction
-    solution_repo = exercise.get_solution_repository()
-    template_repo = exercise.get_template_repository()
-    submission_repo = submission.get_repository()
+#     # Feature extraction
+#     solution_repo = exercise.get_solution_repository()
+#     template_repo = exercise.get_template_repository()
+#     submission_repo = submission.get_repository()
     
-    file_extension = get_file_extension(exercise.programming_language)
-    if file_extension is None:
-        raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
+#     file_extension = get_programming_language_file_extension(exercise.programming_language)
+#     if file_extension is None:
+#         raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
 
-    for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items():
-        if submission_content is None:
-            continue
+#     for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items():
+#         if submission_content is None:
+#             continue
             
-        problem_statement = exercise.meta['file_problem_statements'].get(file_path)
-        if problem_statement is None:
-            logger.info("No problem statement for %s, skipping.", file_path)
-            continue
+#         problem_statement = exercise.meta['file_problem_statements'].get(file_path)
+#         if problem_statement is None:
+#             logger.info("No problem statement for %s, skipping.", file_path)
+#             continue
 
-        grading_instructions = exercise.meta['file_grading_instructions'].get(file_path)
-        if grading_instructions is None:
-            logger.info("No grading instructions for %s, skipping.", file_path)
-            continue
+#         grading_instructions = exercise.meta['file_grading_instructions'].get(file_path)
+#         if grading_instructions is None:
+#             logger.info("No grading instructions for %s, skipping.", file_path)
+#             continue
 
-        submission_content = add_line_numbers(submission_content)
-        solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
-        template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
+#         submission_content = add_line_numbers(submission_content)
+#         solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+#         template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
 
-        input_list.append({
-            "file_path": file_path,
-            "submission_content": submission_content,
-            "solution_to_submission_diff": solution_to_submission_diff,
-            "template_to_submission_diff": template_to_submission_diff,
-            "grading_instructions": grading_instructions,
-            "problem_statement": problem_statement,
-        })
+#         input_list.append({
+#             "file_path": file_path,
+#             "submission_content": submission_content,
+#             "solution_to_submission_diff": solution_to_submission_diff,
+#             "template_to_submission_diff": template_to_submission_diff,
+#             "grading_instructions": grading_instructions,
+#             "problem_statement": problem_statement,
+#         })
     
-    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
-    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+#     system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
+#     human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+#     chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
-    # Filter long prompts
-    input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
+#     # Filter long prompts
+#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
 
-    # Completion
-    chain = LLMChain(llm=chat, prompt=chat_prompt)
-    if not input_list:
-        return []
-    result = await chain.agenerate(input_list)
+#     # Completion
+#     chain = LLMChain(llm=chat, prompt=chat_prompt)
+#     if not input_list:
+#         return []
+#     result = await chain.agenerate(input_list)
     
-    # Parse result
-    feedback_proposals: List[Feedback] = []
-    for input, generations in zip(input_list, result.generations):
-        file_path = input["file_path"]
-        for generation in generations:
-            try:
-                feedbacks = json.loads(generation.text)
-            except json.JSONDecodeError:
-                logger.error("Failed to parse feedback json: %s", generation.text)
-                continue
-            if not isinstance(feedbacks, list):
-                logger.error("Feedback json is not a list: %s", generation.text)
-                continue
+#     # Parse result
+#     feedback_proposals: List[Feedback] = []
+#     for input, generations in zip(input_list, result.generations):
+#         file_path = input["file_path"]
+#         for generation in generations:
+#             try:
+#                 feedbacks = json.loads(generation.text)
+#             except json.JSONDecodeError:
+#                 logger.error("Failed to parse feedback json: %s", generation.text)
+#                 continue
+#             if not isinstance(feedbacks, list):
+#                 logger.error("Feedback json is not a list: %s", generation.text)
+#                 continue
 
-            for feedback in feedbacks:
-                line = feedback.get("line", None)
-                description = feedback.get("text", None)
-                credits = feedback.get("credits", 0.0)
-                feedback_proposals.append(
-                    Feedback(
-                        id=None,
-                        exercise_id=exercise.id,
-                        submission_id=submission.id,
-                        title="Feedback",
-                        description=description,
-                        file_path=file_path,
-                        line_start=line,
-                        line_end=None,
-                        credits=credits,
-                        meta={},
-                    )
-                )
+#             for feedback in feedbacks:
+#                 line = feedback.get("line", None)
+#                 description = feedback.get("text", None)
+#                 credits = feedback.get("credits", 0.0)
+#                 feedback_proposals.append(
+#                     Feedback(
+#                         id=None,
+#                         exercise_id=exercise.id,
+#                         submission_id=submission.id,
+#                         title="Feedback",
+#                         description=description,
+#                         file_path=file_path,
+#                         line_start=line,
+#                         line_end=None,
+#                         credits=credits,
+#                         meta={},
+#                     )
+#                 )
 
-    return feedback_proposals
\ No newline at end of file
+#     return feedback_proposals
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/basic/file_instructions.py b/module_programming_llm/module_programming_llm/basic/file_instructions.py
deleted file mode 100644
index 961fa4f92..000000000
--- a/module_programming_llm/module_programming_llm/basic/file_instructions.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import json
-
-from langchain.prompts import (
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-
-from athena.programming import Exercise
-from athena.logger import logger
-
-from module_programming_llm.helpers.utils import get_diff, get_file_extension
-from module_programming_llm.helpers.models import chat
-
-from .prompts.generate_file_grading_instructions import system_template as system_template_grading_instructions, human_template as human_template_grading_instructions
-from .prompts.generate_file_problem_statements import system_template as system_template_problem_statements, human_template as human_template_problem_statements
-
-def generate_file_grading_instructions(exercise: Exercise):
-    grading_instructions = exercise.grading_instructions or ""
-
-    solution_repo = exercise.get_solution_repository()
-    template_repo = exercise.get_template_repository()
-    file_extension = get_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
-    
-
-    
-    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_grading_instructions)
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_grading_instructions)
-    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-    result = chat(chat_prompt.format_prompt(grading_instructions=grading_instructions, changed_files=changed_files).to_messages())
-    
-    try:
-        return json.loads(result.content)
-    except json.JSONDecodeError:
-        logger.error("Could not decode JSON response from chat: %s", result.content)
-    return None
-
-
-def generate_file_problem_statements(exercise: Exercise):
-    problem_statement = exercise.problem_statement
-
-    solution_repo = exercise.get_solution_repository()
-    template_repo = exercise.get_template_repository()
-    file_extension = get_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
-
-    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_problem_statements)
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_problem_statements)
-    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-    result = chat(chat_prompt.format_prompt(problem_statement=problem_statement, changed_files=changed_files).to_messages())
-    
-    try:
-        return json.loads(result.content)
-    except json.JSONDecodeError:
-        logger.error("Could not decode JSON response from chat: %s", result.content)
-    return None
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py
deleted file mode 100644
index 9294e4da3..000000000
--- a/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py
+++ /dev/null
@@ -1,40 +0,0 @@
-system_template = """\
-You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments.
-
-You receive a submission with some other information and respond with the following JSON format:
-[{{"text": <feedback_comment>, "credits": <number>, "line": <nullable line number (no range)>}}]
-Extremely Important: The response should only contain the json object with the feedback, nothing else!
-
-Effective feedback for programming assignments should possess the following qualities:
-1. Constructive: Provide guidance on how to improve the code, pointing out areas that can be optimized, refactored, or enhanced.
-2. Specific: Highlight exact lines or sections of code that need attention, and suggest precise changes or improvements.
-3. Balanced: Recognize and praise the positive aspects of the code, while also addressing areas for improvement, to encourage and motivate the student.
-4. Clear and concise: Use straightforward language and avoid overly technical jargon, so that the student can easily understand the feedback.
-5. Actionable: Offer practical suggestions for how the student can apply the feedback to improve their code, ensuring they have a clear path forward.
-6. Educational: Explain the reasoning behind the suggestions, so the student can learn from the feedback and develop their programming skills.
-
-Example response:
-[\
-{{"text": "Great use of the compareTo method for comparing Dates, which is the proper way to compare objects.", "credits": 3, "line": 14}},\
-{{"text": "Good job implementing the BubbleSort algorithm for sorting Dates. It shows a clear understanding of the sorting process", "credits": 5, "line": null}},\
-{{"text": "Incorrect use of \'==\' for string comparison, which leads to unexpected results. Use the \'equals\' method for string comparison instead.", "credits": -2, "line": 18}}\
-]\
-"""
-
-human_template = """\
-Student\'s submission to grade:
-{submission_content}
-Diff between solution (deletions) and student\'s submission (additions):
-{solution_to_submission_diff}
-Diff between template (deletions) and student\'s submission (additions):
-{template_to_submission_diff}
-Problem statement:
-{problem_statement}
-Grading instructions:
-{grading_instructions}
-As said, it should be effective feedback following an extremely high standard.
-Critically grade the submission and distribute credits accordingly.
-Be liberal with interpreting the grading instructions.
-
-JSON response:
-"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py b/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py
deleted file mode 100644
index 8f4621bdf..000000000
--- a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py
+++ /dev/null
@@ -1,15 +0,0 @@
-system_template = """\
-You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments.
-
-You receive grading instructions and a list of changed files and respond in the following JSON format, associating each file with its grading instructions:
-{{"<filename>": "<file grading instructions>"}}
-"""
-
-human_template = """\
-Grading instructions:
-{grading_instructions}
-Changed files:
-{changed_files}
-
-JSON response:
-"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py b/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py
deleted file mode 100644
index 28886f8b9..000000000
--- a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py
+++ /dev/null
@@ -1,15 +0,0 @@
-system_template = """\
-You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments.
-
-You receive a overall problem statement and a list of changed files and respond in the following JSON format, associating each file with its file-specific problem statement:
-{{"<filename>": "<file problem statement>"}}
-"""
-
-human_template = """\
-Problem statement:
-{problem_statement}
-Changed files:
-{changed_files}
-
-JSON response:
-"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
new file mode 100644
index 000000000..173ec1255
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -0,0 +1,69 @@
+from pydantic import BaseModel, Field
+
+from athena import config_schema_provider
+from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig
+from module_programming_llm.prompts.generate_suggestions_by_file import (
+    system_template as generate_suggestions_by_file_system_template,
+    human_template as generate_suggestions_by_file_human_template
+) 
+from module_programming_llm.prompts.split_grading_instructions_by_file import (
+    system_template as split_grading_instructions_by_file_template, 
+    human_template as split_grading_instructions_by_file_human_template
+)
+from module_programming_llm.prompts.split_problem_statement_by_file import (
+    system_template as split_problem_statements_by_file_system_template,
+    human_template as split_problem_statements_by_file_human_template
+)
+
+
+class SplitProblemStatementsByFilePrompt(BaseModel):
+    """\
+Features available: **{problem_statement}**, **{changed_files}**\
+
+*Note: `changed_files` are the changed files between template and solution repository.*\
+"""
+    system_message: str = Field(default=split_problem_statements_by_file_system_template,
+                                description="Message for priming AI behavior and instructing it what to do.")
+    human_message: str = Field(default=split_problem_statements_by_file_human_template,
+                               description="Message from a human. The input on which the AI is supposed to act.")
+
+
+class SplitGradingInstructionsByFilePrompt(BaseModel):
+    """\
+Features available: **{grading_instructions}**, **{changed_files}**
+
+*Note: `changed_files` are the changed files between template and solution repository.*\
+"""
+    system_message: str = Field(default=split_grading_instructions_by_file_template,
+                                description="Message for priming AI behavior and instructing it what to do.")
+    human_message: str = Field(default=split_grading_instructions_by_file_human_template,
+                               description="Message from a human. The input on which the AI is supposed to act.")
+
+
+class GenerationPrompt(BaseModel):
+    """\
+Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \
+**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**
+
+*Note: Prompt will be applied per file independently, submission is a single file.*\
+"""
+    system_message: str = Field(default=generate_suggestions_by_file_system_template,
+                                description="Message for priming AI behavior and instructing it what to do.")
+    human_message: str = Field(default=generate_suggestions_by_file_human_template,
+                               description="Message from a human. The input on which the AI is supposed to act.")
+
+
+class BasicApproachConfig(BaseModel):
+    """This approach uses a LLM with a single prompt to generate feedback in a single step."""
+    max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
+    model: ModelConfigType = Field(default=DefaultModelConfig())  # type: ignore
+    
+    split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt())
+    split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt())
+    generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt())
+
+
+@config_schema_provider
+class Configuration(BaseModel):
+    debug: bool = Field(default=False, description="Enable debug mode.")
+    approach: BasicApproachConfig = Field(default=BasicApproachConfig())
diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
new file mode 100644
index 000000000..bc93269eb
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -0,0 +1,179 @@
+from typing import List, Optional, Sequence
+
+from pydantic import BaseModel, Field
+from langchain.chains.openai_functions import create_structured_output_chain
+
+from athena import emit_meta
+from athena.programming import Exercise, Submission, Feedback
+from athena.logger import logger
+
+from module_programming_llm.config import BasicApproachConfig
+from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
+from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
+from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions
+from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
+
+
+class FeedbackModel(BaseModel):
+    title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error")
+    description: str = Field(..., description="Feedback description")
+    line_start: Optional[int] = Field(..., description="Referenced line number start, or empty if unreferenced")
+    line_end: Optional[int] = Field(..., description="Referenced line number end, or empty if unreferenced")
+    credits: float = Field(0.0, description="Number of points received/deducted")
+
+    class Config:
+        title = "Feedback"
+
+
+class AssessmentModel(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+    
+    feedbacks: Sequence[FeedbackModel] = Field(..., description="Assessment feedbacks")
+
+    class Config:
+        title = "Assessment"
+
+
+# pylint: disable=too-many-locals
+async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
+    model = config.model.get_model()
+
+    # Get split grading instructions
+    split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug)
+    file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions }
+
+    # Get split problem statement
+    split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug)
+    file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements }
+
+    prompt_inputs: List[dict] = []
+    
+    # Feature extraction
+    solution_repo = exercise.get_solution_repository()
+    template_repo = exercise.get_template_repository()
+    submission_repo = submission.get_repository()
+    
+    file_extension = get_programming_language_file_extension(exercise.programming_language)
+    if file_extension is None:
+        raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
+
+    files = load_files_from_repo(
+        submission_repo, 
+        file_filter=lambda x: x.endswith(file_extension) if file_extension else False
+    )
+
+    for file_path, content in files.items():
+        if content is None:
+            continue
+        
+        problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.")
+        grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.")
+
+        content = add_line_numbers(content)
+        solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+        template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
+
+        prompt_inputs.append({
+            "file_path": file_path,
+            "submission": content,
+            "max_points": exercise.max_points,
+            "bonus_points": exercise.bonus_points,
+            "solution_to_submission_diff": solution_to_submission_diff,
+            "template_to_submission_diff": template_to_submission_diff,
+            "grading_instructions": grading_instructions,
+            "problem_statement": problem_statement,
+        })
+    
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_by_file_prompt.system_message, 
+        human_message=config.generate_suggestions_by_file_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+    # Filter long prompts (omitting features if necessary)
+    omittable_features = [
+        "problem_statement", 
+        "grading_instructions",
+        "template_to_submission_diff",
+        "solution_to_submission_diff"
+    ]
+    prompt_inputs = [
+        omitted_prompt_input for omitted_prompt_input, should_run in
+        [check_prompt_length_and_omit_features_if_necessary(
+            prompt=chat_prompt,
+            prompt_input=prompt_input,
+            max_input_tokens=config.max_input_tokens,
+            omittable_features=omittable_features,
+            debug=debug
+        ) for prompt_input in prompt_inputs]
+        if should_run
+    ]
+
+    chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt)
+    if not prompt_inputs:
+        return []
+    result = await chain.agenerate(prompt_inputs)
+
+    logger.info("Generated result: %s ", result)
+
+    return []
+    # return predict_and_parse(
+    #     model=model, 
+    #     chat_prompt=chat_prompt, 
+    #     prompt_input={
+    #         "grading_instructions": exercise.grading_instructions, 
+    #         "changed_files": changed_files
+    #     }, 
+    #     pydantic_object=SplitGradingInstructions
+    # )
+
+
+
+
+
+# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
+    
+#     # Filter long prompts
+#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
+
+#     # Completion
+#     chain = LLMChain(llm=chat, prompt=chat_prompt)
+#     if not input_list:
+#         return []
+#     result = await chain.agenerate(input_list)
+    
+#     # Parse result
+#     feedback_proposals: List[Feedback] = []
+#     for input, generations in zip(input_list, result.generations):
+#         file_path = input["file_path"]
+#         for generation in generations:
+#             try:
+#                 feedbacks = json.loads(generation.text)
+#             except json.JSONDecodeError:
+#                 logger.error("Failed to parse feedback json: %s", generation.text)
+#                 continue
+#             if not isinstance(feedbacks, list):
+#                 logger.error("Feedback json is not a list: %s", generation.text)
+#                 continue
+
+#             for feedback in feedbacks:
+#                 line = feedback.get("line", None)
+#                 description = feedback.get("text", None)
+#                 credits = feedback.get("credits", 0.0)
+#                 feedback_proposals.append(
+#                     Feedback(
+#                         id=None,
+#                         exercise_id=exercise.id,
+#                         submission_id=submission.id,
+#                         title="Feedback",
+#                         description=description,
+#                         file_path=file_path,
+#                         line_start=line,
+#                         line_end=None,
+#                         credits=credits,
+#                         meta={},
+#                     )
+#                 )
+
+#     return feedback_proposals
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
new file mode 100644
index 000000000..53a300f00
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -0,0 +1,152 @@
+from typing import Type, TypeVar, List
+from pydantic import BaseModel
+
+import tiktoken
+
+from langchain.chains import LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.base_language import BaseLanguageModel
+from langchain.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
+from langchain.chains.openai_functions import create_structured_output_chain
+
+from athena import emit_meta
+
+T = TypeVar("T", bound=BaseModel)
+
+
+def num_tokens_from_string(string: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+
+
+def num_tokens_from_prompt(chat_prompt: ChatPromptTemplate, prompt_input: dict) -> int:
+    """Returns the number of tokens in a chat prompt."""
+    return num_tokens_from_string(chat_prompt.format(**prompt_input))
+
+
+def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate, 
+                                                       prompt_input: dict, 
+                                                       max_input_tokens: int, 
+                                                       omittable_features: List[str],
+                                                       debug: bool):
+    """Check if the input is too long and omit features if necessary.
+
+    Note: Omitted features will be replaced with "omitted" in the prompt
+
+    Args:
+        prompt (ChatPromptTemplate): Prompt template
+        prompt_input (dict): Prompt input
+        max_input_tokens (int): Maximum number of tokens allowed
+        omittable_features (List[str]): List of features that can be omitted, ordered by priority (least important first)
+        debug (bool): Debug flag
+
+    Returns:
+        (dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and 
+                      should_run is True if the model should run, False otherwise
+    """
+    if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens:
+        return prompt_input, True
+
+    omitted_features = []
+
+    # Omit features until the input is short enough
+    for feature in omittable_features:
+        if feature in prompt_input:
+            omitted_features.append(feature)
+            prompt_input[feature] = "omitted"
+            if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens:
+                if debug:
+                    emit_meta("omitted_features", omitted_features)
+                return prompt_input, True
+
+    # If we get here, we couldn't omit enough features
+    return prompt_input, False
+
+
+def supports_function_calling(model: BaseLanguageModel):
+    """Returns True if the model supports function calling, False otherwise
+
+    Args:
+        model (BaseLanguageModel): The model to check
+
+    Returns:
+        boolean: True if the model supports function calling, False otherwise
+    """
+    return isinstance(model, ChatOpenAI)
+
+
+def get_chat_prompt_with_formatting_instructions(
+            model: BaseLanguageModel,
+            system_message: str, 
+            human_message: str,
+            pydantic_object: Type[T]
+        ) -> ChatPromptTemplate:
+    """Returns a ChatPromptTemplate with formatting instructions (if necessary)
+
+    Note: Does nothing if the model supports function calling
+
+    Args:
+        model (BaseLanguageModel): The model to check if it supports function calling
+        system_message (str): System message
+        human_message (str): Human message
+        pydantic_object (Type[T]): Model to parse the output
+
+    Returns:
+        ChatPromptTemplate: ChatPromptTemplate with formatting instructions (if necessary)
+    """
+    if supports_function_calling(model):
+        system_message_prompt = SystemMessagePromptTemplate.from_template(system_message)
+        human_message_prompt = HumanMessagePromptTemplate.from_template(human_message)
+        return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+    
+    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
+    system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
+    system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
+    system_message_prompt.prompt.input_variables.remove("format_instructions")
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:")
+    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+
+def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+    """Predicts and parses the output of the model
+
+    Args:
+        model (BaseLanguageModel): The model to predict with
+        chat_prompt (ChatPromptTemplate): Prompt to use
+        prompt_input (dict): Input parameters to use for the prompt
+        pydantic_object (Type[T]): Pydantic model to parse the output
+    """
+    if supports_function_calling(model):
+        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
+        return chain.run(**prompt_input)
+    
+    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
+    chain = LLMChain(llm=model, prompt=chat_prompt)
+    output = chain.run(**prompt_input)
+    return output_parser.parse(output)
+
+
+async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+    """Generates and parses the output of the model
+
+    Args:
+        model (BaseLanguageModel): The model to generate with
+        chat_prompt (ChatPromptTemplate): Prompt to use
+        prompt_input (dict): Input parameters to use for the prompt
+        pydantic_object (Type[T]): Pydantic model to parse the output
+    """
+    if supports_function_calling(model):
+        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
+        return chain.run(**prompt_input)
+    
+    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
+    chain = LLMChain(llm=model, prompt=chat_prompt)
+    output = chain.run(**prompt_input)
+    return output_parser.parse(output)
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/helpers/models.py b/module_programming_llm/module_programming_llm/helpers/models.py
deleted file mode 100644
index 70826a454..000000000
--- a/module_programming_llm/module_programming_llm/helpers/models.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import os 
-from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
-import openai
- 
-OPENAI_API_TYPE = os.environ.get("OPENAI_API_TYPE")
-
-# Validate environment variables
-if "OPENAI_API_KEY" not in os.environ:
-    raise EnvironmentError("OPENAI_API_KEY environment variable not set.")
-if OPENAI_API_TYPE == "azure":
-    if "OPENAI_API_BASE" not in os.environ:
-        raise EnvironmentError("OPENAI_API_TYPE=azure but OPENAI_API_BASE environment variable not set.")
-    if "OPENAI_API_VERSION" not in os.environ:
-        raise EnvironmentError("OPENAI_API_TYPE=azure but OPENAI_API_VERSION environment variable not set.")
-    if "AZURE_DEPLOYMENT_NAME" not in os.environ:
-        raise EnvironmentError("OPENAI_API_TYPE=azure but AZURE_DEPLOYMENT_NAME environment variable not set.")
-    
-    AZURE_DEPLOYMENT_NAME = os.environ["AZURE_DEPLOYMENT_NAME"]
- 
-    # Check if deployment exists
-    openai.api_type = "azure"
-    openai.api_key = os.environ["OPENAI_API_KEY"]
-    openai.api_base = os.environ["OPENAI_API_BASE"]
-    openai.api_version = os.environ["OPENAI_API_VERSION"]
-     
-    deployments = openai.Deployment.list().data # type: ignore
-    deployment_ids = [deployment.id for deployment in deployments]
-    if AZURE_DEPLOYMENT_NAME not in deployment_ids:
-        deployments = [{ "id": deployment.id, "model": deployment.model } for deployment in deployments]
-        raise EnvironmentError(f"Deployment id '{AZURE_DEPLOYMENT_NAME}' not found, available deployments: {deployments}")
-
-    # Initialize azure chat model
-    chat = AzureChatOpenAI(deployment_name=AZURE_DEPLOYMENT_NAME, client="", temperature=0)
-else:
-    # Initialize openai chat model
-    chat = ChatOpenAI(client="", temperature=0)
diff --git a/module_programming_llm/module_programming_llm/helpers/models/__init__.py b/module_programming_llm/module_programming_llm/helpers/models/__init__.py
new file mode 100644
index 000000000..f5ab68a2f
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/helpers/models/__init__.py
@@ -0,0 +1,36 @@
+import os
+from typing import Type, Union, List
+from module_programming_llm.helpers.models.model_config import ModelConfig
+
+
+DefaultModelConfig: Type[ModelConfig]
+default_model_name = os.environ.get("LLM_DEFAULT_MODEL")
+
+types: List[Type[ModelConfig]] = []
+try:
+    import module_programming_llm.helpers.models.openai as openai_config
+    types.append(openai_config.OpenAIModelConfig)
+    if default_model_name in openai_config.available_models:
+        DefaultModelConfig = openai_config.OpenAIModelConfig
+except AttributeError:
+    pass
+
+try:
+    import module_programming_llm.helpers.models.replicate as replicate_config
+    types.append(replicate_config.ReplicateModelConfig)
+    if default_model_name in replicate_config.available_models:
+        DefaultModelConfig = replicate_config.ReplicateModelConfig
+except AttributeError:
+    pass
+
+if not types:
+    raise EnvironmentError(
+        "No model configurations available, please set up at least one provider in the environment variables.")
+
+if 'DefaultModelConfig' not in globals():
+    DefaultModelConfig = types[0]
+
+if len(types) == 1:
+    ModelConfigType = types[0]
+else:
+    ModelConfigType = Union[tuple(types)]  # type: ignore
diff --git a/module_programming_llm/module_programming_llm/helpers/models/model_config.py b/module_programming_llm/module_programming_llm/helpers/models/model_config.py
new file mode 100644
index 000000000..f433ab587
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/helpers/models/model_config.py
@@ -0,0 +1,10 @@
+from abc import ABC, abstractmethod
+from pydantic import BaseModel
+from langchain.base_language import BaseLanguageModel
+
+
+class ModelConfig(BaseModel, ABC):
+    
+    @abstractmethod
+    def get_model(self) -> BaseLanguageModel:
+        pass
diff --git a/module_programming_llm/module_programming_llm/helpers/models/openai.py b/module_programming_llm/module_programming_llm/helpers/models/openai.py
new file mode 100644
index 000000000..90cd24795
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/helpers/models/openai.py
@@ -0,0 +1,321 @@
+import os
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, List
+from pydantic import Field, validator, PositiveInt
+from enum import Enum
+
+import openai
+from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
+from langchain.llms import AzureOpenAI, OpenAI
+from langchain.llms.openai import BaseOpenAI
+from langchain.base_language import BaseLanguageModel
+
+from athena.logger import logger
+from .model_config import ModelConfig
+
+
+OPENAI_PREFIX = "openai_"
+AZURE_OPENAI_PREFIX = "azure_openai_"
+
+
+#########################################################################
+# Monkey patching openai/langchain api                                  #
+# ===================================================================== #
+# This allows us to have multiple api keys i.e. mixing                  #
+# openai and azure openai api keys so we can use not only deployed      #
+# models but also models from the non-azure openai api.                 #
+# This is mostly for testing purposes, in production we can just deploy #
+# the models to azure that we want to use.                              #
+#########################################################################
+
+def _wrap(old: Any, new: Any) -> Callable:
+    def repl(*args: Any, **kwargs: Any) -> Any:
+        new(args[0])  # args[0] is self
+        return old(*args, **kwargs)
+    return repl
+
+
+def _async_wrap(old: Any, new: Any):
+    async def repl(*args, **kwargs):
+        new(args[0])  # args[0] is self
+        return await old(*args, **kwargs)
+    return repl
+
+
+def _set_credentials(self):
+    openai.api_key = self.openai_api_key
+
+    api_type = "open_ai"
+    api_base = "https://api.openai.com/v1"
+    api_version = None
+    if hasattr(self, "openai_api_type"):
+        api_type = self.openai_api_type
+
+    if api_type == "azure":
+        if hasattr(self, "openai_api_base"):
+            api_base = self.openai_api_base
+        if hasattr(self, "openai_api_version"):
+            api_version = self.openai_api_version
+
+    openai.api_type = api_type
+    openai.api_base = api_base
+    openai.api_version = api_version
+
+
+# Monkey patching langchain
+# pylint: disable=protected-access
+ChatOpenAI._generate = _wrap(ChatOpenAI._generate, _set_credentials)  # type: ignore
+ChatOpenAI._agenerate = _async_wrap(ChatOpenAI._agenerate, _set_credentials)  # type: ignore
+BaseOpenAI._generate = _wrap(BaseOpenAI._generate, _set_credentials)  # type: ignore
+BaseOpenAI._agenerate = _async_wrap(BaseOpenAI._agenerate, _set_credentials)  # type: ignore
+# pylint: enable=protected-access
+
+#########################################################################
+# Monkey patching end                                                   #
+#########################################################################
+
+
+def _use_azure_credentials():
+    openai.api_type = "azure"
+    openai.api_key = os.environ.get("LLM_AZURE_OPENAI_API_KEY")
+    openai.api_base = os.environ.get("LLM_AZURE_OPENAI_API_BASE")
+    # os.environ.get("LLM_AZURE_OPENAI_API_VERSION")
+    openai.api_version = "2023-03-15-preview"
+
+
+def _use_openai_credentials():
+    openai.api_type = "open_ai"
+    openai.api_key = os.environ.get("LLM_OPENAI_API_KEY")
+    openai.api_base = "https://api.openai.com/v1"
+    openai.api_version = None
+
+
+openai_available = len(os.environ.get("LLM_OPENAI_API_KEY") or "") > 0
+azure_openai_available = len(os.environ.get("LLM_AZURE_OPENAI_API_KEY") or "") > 0
+
+
+# This is a hack to make sure that the openai api is set correctly
+# Right now it is overkill, but it will be useful when the api gets fixed and we no longer
+# hardcode the model names (i.e. OpenAI fixes their api)
+@contextmanager
+def _openai_client(use_azure_api: bool, is_preference: bool):
+    """Set the openai client to use the correct api type, if available
+
+    Args:
+        use_azure_api (bool): If true, use the azure api, else use the openai api
+        is_preference (bool): If true, it can fall back to the other api if the preferred one is not available
+    """
+    if use_azure_api:
+        if azure_openai_available:
+            _use_azure_credentials()
+        elif is_preference and openai_available:
+            _use_openai_credentials()
+        elif is_preference:
+            raise EnvironmentError(
+                "No OpenAI api available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and "
+                "LLM_AZURE_OPENAI_API_VERSION environment variables or LLM_OPENAI_API_KEY environment variable"
+            )
+        else:
+            raise EnvironmentError(
+                "Azure OpenAI api not available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and "
+                "LLM_AZURE_OPENAI_API_VERSION environment variables"
+            )
+    else:
+        if openai_available:
+            _use_openai_credentials()
+        elif is_preference and azure_openai_available:
+            _use_azure_credentials()
+        elif is_preference:
+            raise EnvironmentError(
+                "No OpenAI api available, please set LLM_OPENAI_API_KEY environment variable or LLM_AZURE_OPENAI_API_KEY, "
+                "LLM_AZURE_OPENAI_API_BASE and LLM_AZURE_OPENAI_API_VERSION environment variables"
+            )
+        else:
+            raise EnvironmentError(
+                "OpenAI api not available, please set LLM_OPENAI_API_KEY environment variable"
+            )
+
+    # API client is setup correctly
+    yield
+
+
+def _get_available_deployments(openai_models: Dict[str, List[str]], model_aliases: Dict[str, str]):
+    available_deployments: Dict[str, Dict[str, Any]] = {
+        "chat_completion": {},
+        "completion": {},
+        "fine_tuneing": {},
+    }
+
+    if azure_openai_available:
+        with _openai_client(use_azure_api=True, is_preference=False):
+            deployments = openai.Deployment.list().get("data") or []  # type: ignore
+            for deployment in deployments:
+                model_name = deployment.model
+                if model_name in model_aliases:
+                    model_name = model_aliases[model_name]
+                if model_name in openai_models["chat_completion"]:
+                    available_deployments["chat_completion"][deployment.id] = deployment
+                elif model_name in openai_models["completion"]:
+                    available_deployments["completion"][deployment.id] = deployment
+                elif model_name in openai_models["fine_tuneing"]:
+                    available_deployments["fine_tuneing"][deployment.id] = deployment
+
+    return available_deployments
+
+
+def _get_available_models(openai_models: Dict[str, List[str]], 
+                          available_deployments: Dict[str, Dict[str, Any]]):
+    available_models: Dict[str, BaseLanguageModel] = {}
+
+    if openai_available:
+        openai_api_key = os.environ["LLM_OPENAI_API_KEY"]
+        for model_name in openai_models["chat_completion"]:
+            available_models[OPENAI_PREFIX + model_name] = ChatOpenAI(
+                model=model_name, openai_api_key=openai_api_key, client="")
+        for model_name in openai_models["completion"]:
+            available_models[OPENAI_PREFIX + model_name] = OpenAI(
+                model=model_name, openai_api_key=openai_api_key, client="")
+
+    if azure_openai_available:
+        azure_openai_api_key = os.environ["LLM_AZURE_OPENAI_API_KEY"]
+        azure_openai_api_base = os.environ["LLM_AZURE_OPENAI_API_BASE"]
+        azure_openai_api_version = os.environ["LLM_AZURE_OPENAI_API_VERSION"]
+
+        for model_type, Model in [("chat_completion", AzureChatOpenAI), ("completion", AzureOpenAI)]:
+            for deployment_name, deployment in available_deployments[model_type].items():
+                available_models[AZURE_OPENAI_PREFIX + deployment_name] = Model(
+                    model=deployment.model,
+                    deployment_name=deployment_name,
+                    openai_api_base=azure_openai_api_base,
+                    openai_api_version=azure_openai_api_version,
+                    openai_api_key=azure_openai_api_key,
+                    client="",
+                )
+
+    return available_models
+
+
+_model_aliases = {
+    "gpt-35-turbo": "gpt-3.5-turbo",
+}
+
+# Hardcoded because openai can't provide a trustworthly api to get the list of models and capabilities...
+openai_models = {
+    "chat_completion": [
+        "gpt-4",
+        # "gpt-4-32k", # Not publicly available
+        "gpt-3.5-turbo",
+        "gpt-3.5-turbo-16k"
+    ],
+    "completion": [
+        "text-davinci-003",
+        "text-curie-001",
+        "text-babbage-001",
+        "text-ada-001",
+    ],
+    "fine_tuneing": [
+        "davinci",
+        "curie",
+        "babbage",
+        "ada",
+    ]
+}
+available_deployments = _get_available_deployments(openai_models, _model_aliases)
+available_models = _get_available_models(openai_models, available_deployments)
+
+if available_models:
+    logger.info("Available openai models: %s", ", ".join(available_models.keys()))
+
+    OpenAIModel = Enum('OpenAIModel', {name: name for name in available_models})  # type: ignore
+
+
+    default_model_name = "gpt-3.5-turbo"
+    if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models:
+        default_model_name = os.environ["LLM_DEFAULT_MODEL"]
+    if default_model_name not in available_models:
+        default_model_name = list(available_models.keys())[0]
+
+    default_openai_model = OpenAIModel[default_model_name]
+
+
+    # Long descriptions will be displayed in the playground UI and are copied from the OpenAI docs
+    class OpenAIModelConfig(ModelConfig):
+        """OpenAI LLM configuration."""
+
+        model_name: OpenAIModel = Field(default=default_openai_model,  # type: ignore
+                                        description="The name of the model to use.")
+        max_tokens: PositiveInt = Field(1000, description="""\
+The maximum number of [tokens](https://platform.openai.com/tokenizer) to generate in the chat completion.
+
+The total length of input tokens and generated tokens is limited by the model's context length. \
+[Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.\
+""")
+
+        temperature: float = Field(default=0.0, ge=0, le=2, description="""\
+What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, \
+while lower values like 0.2 will make it more focused and deterministic.
+
+We generally recommend altering this or `top_p` but not both.\
+""")
+
+        top_p: float = Field(default=1, ge=0, le=1, description="""\
+An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. \
+So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+
+We generally recommend altering this or `temperature` but not both.\
+""")
+
+        presence_penalty: float = Field(default=0, ge=-2, le=2, description="""\
+Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, \
+increasing the model's likelihood to talk about new topics.
+
+[See more information about frequency and presence penalties.](https://platform.openai.com/docs/api-reference/parameter-details)\
+""")
+
+        frequency_penalty: float = Field(default=0, ge=-2, le=2, description="""\
+Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, \
+decreasing the model's likelihood to repeat the same line verbatim.
+
+[See more information about frequency and presence penalties.](https://platform.openai.com/docs/api-reference/parameter-details)\
+""")
+
+        @validator('max_tokens')
+        def max_tokens_must_be_positive(cls, v):
+            """
+            Validate that max_tokens is a positive integer.
+            """
+            if v <= 0:
+                raise ValueError('max_tokens must be a positive integer')
+            return v
+
+        def get_model(self) -> BaseLanguageModel:
+            """Get the model from the configuration.
+
+            Returns:
+                BaseLanguageModel: The model.
+            """
+            model = available_models[self.model_name.value]
+            kwargs = model._lc_kwargs
+            secrets = {secret: getattr(model, secret) for secret in model.lc_secrets.keys()}
+            kwargs.update(secrets)
+
+            model_kwargs = kwargs.get("model_kwargs", {})
+            for attr, value in self.dict().items():
+                if attr == "model_name":
+                    # Skip model_name
+                    continue
+                if hasattr(model, attr):
+                    # If the model has the attribute, add it to kwargs
+                    kwargs[attr] = value
+                else:
+                    # Otherwise, add it to model_kwargs (necessary for chat models)
+                    model_kwargs[attr] = value
+            kwargs["model_kwargs"] = model_kwargs
+
+            # Initialize a copy of the model using the config
+            model = model.__class__(**kwargs)
+            return model
+
+
+        class Config:
+            title = 'OpenAI'
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
new file mode 100644
index 000000000..a706b8247
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
@@ -0,0 +1,110 @@
+import os
+from pydantic import Field, PositiveInt
+from enum import Enum
+
+from langchain.llms import Replicate
+from langchain.base_language import BaseLanguageModel
+
+from athena.logger import logger
+from .model_config import ModelConfig
+
+
+# Hardcoded list of models
+replicate_models = {
+    # LLAMA 2 70B Chat
+    # https://replicate.com/replicate/llama-2-70b-chat
+    "llama-2-70b-chat": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
+    # LLaMA 2 13B Chat
+    # https://replicate.com/a16z-infra/llama-2-13b-chat
+    "llama-2-13b-chat": "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
+    # LLaMA 2 7B Chat
+    # https://replicate.com/a16z-infra/llama-2-7b-chat
+    "llama-2-7b-chat": "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c",
+}
+
+available_models = {}
+if len(os.environ.get("REPLICATE_API_TOKEN") or "") > 0:
+    available_models = {
+        name: Replicate(
+            model=model,
+        )
+        for name, model in replicate_models.items()
+    }
+else:
+    logger.warning("REPLICATE_API_TOKEN not found in environment variables. Replicate models are disabled.")
+
+if available_models:
+    logger.info("Available replicate models: %s",
+                ", ".join(available_models.keys()))
+
+    ReplicateModel = Enum('ReplicateModel', {name: name for name in available_models})  # type: ignore
+
+
+    default_model_name = "llama-2-13b-chat"
+    if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models:
+        default_model_name = os.environ["LLM_DEFAULT_MODEL"]
+    if default_model_name not in available_models:
+        default_model_name = list(available_models.keys())[0]
+
+    default_replicate_model = ReplicateModel[default_model_name]
+
+
+    # Note: Config has been setup with LLaMA 2 chat models in mind, other models may not work as expected
+    class ReplicateModelConfig(ModelConfig):
+        """Replicate LLM configuration."""
+
+        model_name: ReplicateModel = Field(default=default_replicate_model,  # type: ignore
+                                           description="The name of the model to use.")
+        max_new_tokens: PositiveInt = Field(1000, description="""\
+Maximum number of tokens to generate. A word is generally 2-3 tokens (minimum: 1)\
+""")
+        min_new_tokens: int = Field(-1, description="""\
+Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens. (minimum: -1)\
+""")
+        temperature: float = Field(default=0.01, ge=0.01, le=5, description="""\
+Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\
+(minimum: 0.01; maximum: 5)\
+""")
+        top_p: float = Field(default=1, ge=0, le=1, description="""\
+When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens (maximum: 1)\
+""")
+        top_k: PositiveInt = Field(default=250, description="""\
+When decoding text, samples from the top k most likely tokens; lower to ignore less likely tokens\
+""")
+        repetition_penalty: float = Field(default=1, ge=0.01, le=5, description="""\
+Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, \
+less than 1 encourage it. (minimum: 0.01; maximum: 5)\
+""")
+        repetition_penalty_sustain: int = Field(default=-1, description="""
+Number of most recent tokens to apply repetition penalty to, -1 to apply to whole context (minimum: -1)\
+""")
+        token_repetition_penalty_decay: PositiveInt = Field(default=128, description="""\
+Gradually decrease penalty over this many tokens (minimum: 1)\
+""")
+
+        def get_model(self) -> BaseLanguageModel:
+            """Get the model from the configuration.
+
+            Returns:
+                BaseLanguageModel: The model.
+            """
+            model = available_models[self.model_name.value]
+            kwargs = model._lc_kwargs
+
+            input_kwargs = {}
+            for attr, value in self.dict().items():
+                # Skip model_name
+                if attr == "model_name":
+                    continue
+                input_kwargs[attr] = value
+
+            # Set model parameters
+            kwargs["input"] = input_kwargs
+
+            # Initialize a copy of the model using the config
+            model = model.__class__(**kwargs)
+            return model
+        
+
+        class Config:
+            title = 'Replicate'
diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py
index a85cfa997..8f6c67ab8 100644
--- a/module_programming_llm/module_programming_llm/helpers/utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/utils.py
@@ -34,7 +34,7 @@ def add_line_numbers(content: str) -> str:
     )
 
 
-def get_file_extension(programming_language: str) -> str | None:
+def get_programming_language_file_extension(programming_language: str) -> str | None:
     # JAVA, C, OCAML, HASKELL, PYTHON, SWIFT, VHDL, ASSEMBLER, EMPTY, KOTLIN
     file_extensions = {
         "JAVA": ".java",
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
new file mode 100644
index 000000000..f1a300ae0
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -0,0 +1,27 @@
+system_template = """\
+You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
+
+VERY IMPORTANT: Effective feedback for text assignments should be:
+1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\
+"""
+
+human_template = """\
+Problem statement:
+{problem_statement}
+
+Example solution:
+{example_solution}
+
+Grading instructions:
+{grading_instructions}
+Max points: {max_points}, bonus points: {bonus_points}
+
+Student\'s submission to grade (with line numbers <number>: <line>):
+{submission}
+
+Diff between solution (deletions) and student\'s submission (additions):
+{solution_to_submission_diff}
+
+Diff between template (deletions) and student\'s submission (additions):
+{template_to_submission_diff}
+"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
new file mode 100644
index 000000000..7c0cc685d
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -0,0 +1,13 @@
+system_template = """\
+You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
+
+Restructure the grading instructions by changed file.
+"""
+
+human_template = """\
+Grading instructions:
+{grading_instructions}
+
+Changed files:
+{changed_files}
+"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
new file mode 100644
index 000000000..a1ee99f9f
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -0,0 +1,13 @@
+system_template = """\
+You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
+
+Restructure the grading instructions by changed file.
+"""
+
+human_template = """\
+Problem statement:
+{problem_statement}
+
+Changed files:
+{changed_files}
+"""
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
new file mode 100644
index 000000000..fefa93e14
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -0,0 +1,95 @@
+from typing import Optional, Sequence
+from athena import emit_meta
+
+from pydantic import BaseModel, Field
+
+from athena.programming import Exercise
+from athena.storage import store_exercise
+
+from module_programming_llm.config import BasicApproachConfig
+from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
+from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
+
+
+FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions"
+
+
+class FileGradingInstruction(BaseModel):
+    file_name: str = Field(..., description="File name")
+    grading_instructions: str = Field(..., description="Grading instructions relevant for this file")
+
+
+class SplitGradingInstructions(BaseModel):
+    """Collection of grading instructions split by file"""
+    instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+
+
+def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
+    """Split the general grading instructions by file
+
+    Args:
+        exercise (Exercise): Exercise to split the grading instructions for
+        config (BasicApproachConfig): Configuration
+
+    Returns:
+        SplitGradingInstructions: Grading instructions split by file, empty if input was too long
+    """
+    if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "":
+        return SplitGradingInstructions(instructions=[])
+    
+    model = config.model.get_model()
+    
+    solution_repo = exercise.get_solution_repository()
+    template_repo = exercise.get_template_repository()
+    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
+    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
+
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.split_grading_instructions_by_file_prompt.system_message, 
+        human_message=config.split_grading_instructions_by_file_prompt.human_message, 
+        pydantic_object=SplitGradingInstructions
+    )
+
+    prompt_input = {
+        "grading_instructions": exercise.grading_instructions, 
+        "changed_files": changed_files
+    }
+
+    # If the input is too long, return an empty SplitGradingInstructions object
+    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
+    if prompt_length > config.max_input_tokens:
+        if debug:
+            emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
+        return SplitGradingInstructions(instructions=[])
+
+    split_grading_instructions = predict_and_parse(
+        model=model, 
+        chat_prompt=chat_prompt, 
+        prompt_input=prompt_input, 
+        pydantic_object=SplitGradingInstructions
+    )
+
+    if debug:
+        emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict())
+
+    return split_grading_instructions
+
+
+def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
+    """Generate and store the split grading instructions if needed
+
+    Args:
+        exercise (Exercise): Exercise to get the split grading instructions for
+        config (BasicApproachConfig): Configuration
+
+    Returns:
+        SplitGradingInstructions: Grading instructions split by file
+    """
+    if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta:
+        return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY])
+
+    split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug)
+    exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict()
+    store_exercise(exercise)
+    return split_grading_instructions
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
new file mode 100644
index 000000000..8329e5749
--- /dev/null
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -0,0 +1,95 @@
+from typing import Sequence
+from athena import emit_meta
+
+from pydantic import BaseModel, Field
+
+from athena.programming import Exercise
+from athena.storage import store_exercise
+
+from module_programming_llm.config import BasicApproachConfig
+from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
+from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
+
+
+FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements"
+
+
+class FileProblemStatement(BaseModel):
+    file_name: str = Field(..., description="File name")
+    problem_statement: str = Field(..., description="Problem statement relevant for this file")
+
+
+class SplitProblemStatement(BaseModel):
+    """Collection of problem statements split by file"""
+    problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+
+
+def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
+    """Split the general problem statement by file
+
+    Args:
+        exercise (Exercise): Exercise to split the problem statement for
+        config (BasicApproachConfig): Configuration
+
+    Returns:
+        SplitProblemStatement: Problem statement split by file, empty if input was too long
+    """
+    if exercise.problem_statement.strip() == "":
+        return SplitProblemStatement(problem_statements=[])
+    
+    model = config.model.get_model()
+    
+    solution_repo = exercise.get_solution_repository()
+    template_repo = exercise.get_template_repository()
+    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
+    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
+
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.split_problem_statement_by_file_prompt.system_message,
+        human_message=config.split_problem_statement_by_file_prompt.system_message,
+        pydantic_object=SplitProblemStatement
+    )
+
+    prompt_input = {
+        "problem_statement": exercise.problem_statement,
+        "changed_files": changed_files
+    }
+
+    # If the input is too long, return an empty SplitProblemStatement object
+    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
+    if prompt_length > config.max_input_tokens:
+        if debug:
+            emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
+        return SplitProblemStatement(problem_statements=[])
+
+    split_problem_statement = predict_and_parse(
+        model=model, 
+        chat_prompt=chat_prompt, 
+        prompt_input=prompt_input,
+        pydantic_object=SplitProblemStatement
+    )
+
+    if debug:
+        emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict())
+
+    return split_problem_statement
+
+
+def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
+    """Generate and store the split problem statement if needed
+
+    Args:
+        exercise (Exercise): Exercise to split the problem statement for
+        config (BasicApproachConfig): Configuration
+
+    Returns:
+        SplitProblemStatement: Problem statement split by file
+    """
+    if FILE_PROBLEM_STATEMETS_KEY in exercise.meta:
+        return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY])
+
+    split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug)
+    exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict()
+    store_exercise(exercise)
+    return split_problem_statement
diff --git a/module_programming_llm/poetry.lock b/module_programming_llm/poetry.lock
index c0f838c48..008a22564 100644
--- a/module_programming_llm/poetry.lock
+++ b/module_programming_llm/poetry.lock
@@ -146,6 +146,26 @@ doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-
 test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
 trio = ["trio (<0.22)"]
 
+[[package]]
+name = "astroid"
+version = "2.15.6"
+description = "An abstract syntax tree for Python with inference support."
+category = "dev"
+optional = false
+python-versions = ">=3.7.2"
+files = [
+    {file = "astroid-2.15.6-py3-none-any.whl", hash = "sha256:389656ca57b6108f939cf5d2f9a2a825a3be50ba9d589670f393236e0a03b91c"},
+    {file = "astroid-2.15.6.tar.gz", hash = "sha256:903f024859b7c7687d7a7f3a3f73b17301f8e42dfd9cc9df9d4418172d3e2dbd"},
+]
+
+[package.dependencies]
+lazy-object-proxy = ">=1.4.0"
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
+wrapt = [
+    {version = ">=1.11,<2", markers = "python_version < \"3.11\""},
+    {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
+]
+
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -343,6 +363,33 @@ typing-inspect = ">=0.4.0"
 [package.extras]
 dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=7.2.0)", "setuptools", "simplejson", "twine", "types-dataclasses", "wheel"]
 
+[[package]]
+name = "dill"
+version = "0.3.7"
+description = "serialize all of Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"},
+    {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"},
+]
+
+[package.extras]
+graph = ["objgraph (>=1.7.2)"]
+
+[[package]]
+name = "dodgy"
+version = "0.2.1"
+description = "Dodgy: Searches for dodgy looking lines in Python code"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "dodgy-0.2.1-py3-none-any.whl", hash = "sha256:51f54c0fd886fa3854387f354b19f429d38c04f984f38bc572558b703c0542a6"},
+    {file = "dodgy-0.2.1.tar.gz", hash = "sha256:28323cbfc9352139fdd3d316fa17f325cc0e9ac74438cbba51d70f9b48f86c3a"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.1.3"
@@ -380,6 +427,38 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>
 doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
 test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
 
+[[package]]
+name = "flake8"
+version = "2.3.0"
+description = "the modular source code checker: pep8, pyflakes and co"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "flake8-2.3.0-py2.py3-none-any.whl", hash = "sha256:c99cc9716d6655d9c8bcb1e77632b8615bf0abd282d7abd9f5c2148cad7fc669"},
+    {file = "flake8-2.3.0.tar.gz", hash = "sha256:5ee1a43ccd0716d6061521eec6937c983efa027793013e572712c4da55c7c83e"},
+]
+
+[package.dependencies]
+mccabe = ">=0.2.1"
+pep8 = ">=1.5.7"
+pyflakes = ">=0.8.1"
+
+[[package]]
+name = "flake8-polyfill"
+version = "1.0.2"
+description = "Polyfill package for Flake8 plugins"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "flake8-polyfill-1.0.2.tar.gz", hash = "sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda"},
+    {file = "flake8_polyfill-1.0.2-py2.py3-none-any.whl", hash = "sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9"},
+]
+
+[package.dependencies]
+flake8 = "*"
+
 [[package]]
 name = "frozenlist"
 version = "1.4.0"
@@ -625,6 +704,24 @@ files = [
     {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
 ]
 
+[[package]]
+name = "isort"
+version = "5.12.0"
+description = "A Python utility / library to sort Python imports."
+category = "dev"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
+    {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
+]
+
+[package.extras]
+colors = ["colorama (>=0.4.3)"]
+pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
+plugins = ["setuptools"]
+requirements-deprecated-finder = ["pip-api", "pipreqs"]
+
 [[package]]
 name = "joblib"
 version = "1.3.2"
@@ -693,6 +790,52 @@ files = [
 pydantic = ">=1,<3"
 requests = ">=2,<3"
 
+[[package]]
+name = "lazy-object-proxy"
+version = "1.9.0"
+description = "A fast and thorough lazy object proxy."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"},
+]
+
 [[package]]
 name = "marshmallow"
 version = "3.20.1"
@@ -729,6 +872,18 @@ files = [
 [package.dependencies]
 marshmallow = ">=2.0.0"
 
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
 [[package]]
 name = "multidict"
 version = "6.0.4"
@@ -1026,6 +1181,89 @@ files = [
     {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
 ]
 
+[[package]]
+name = "pep8"
+version = "1.7.1"
+description = "Python style guide checker"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pep8-1.7.1-py2.py3-none-any.whl", hash = "sha256:b22cfae5db09833bb9bd7c8463b53e1a9c9b39f12e304a8d0bba729c501827ee"},
+    {file = "pep8-1.7.1.tar.gz", hash = "sha256:fe249b52e20498e59e0b5c5256aa52ee99fc295b26ec9eaa85776ffdb9fe6374"},
+]
+
+[[package]]
+name = "pep8-naming"
+version = "0.10.0"
+description = "Check PEP-8 naming conventions, plugin for flake8"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pep8-naming-0.10.0.tar.gz", hash = "sha256:f3b4a5f9dd72b991bf7d8e2a341d2e1aa3a884a769b5aaac4f56825c1763bf3a"},
+    {file = "pep8_naming-0.10.0-py2.py3-none-any.whl", hash = "sha256:5d9f1056cb9427ce344e98d1a7f5665710e2f20f748438e308995852cfa24164"},
+]
+
+[package.dependencies]
+flake8-polyfill = ">=1.0.2,<2"
+
+[[package]]
+name = "platformdirs"
+version = "3.10.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"},
+    {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
+
+[[package]]
+name = "prospector"
+version = "1.10.2"
+description = "Prospector is a tool to analyse Python code by aggregating the result of other tools."
+category = "dev"
+optional = false
+python-versions = ">=3.7.2,<4.0"
+files = [
+    {file = "prospector-1.10.2-py3-none-any.whl", hash = "sha256:3bfe103c28bb821cca84926ca31357fbfd32405e4bf8c34ca2e55885684557e4"},
+    {file = "prospector-1.10.2.tar.gz", hash = "sha256:cc8f09e79bdd32247edddf05b666940e88ad96338a84f5717b1e8c0678337821"},
+]
+
+[package.dependencies]
+dodgy = ">=0.2.1,<0.3.0"
+flake8 = "<6.0.0"
+GitPython = ">=3.1.27,<4.0.0"
+mccabe = ">=0.7.0,<0.8.0"
+packaging = "*"
+pep8-naming = ">=0.3.3,<=0.10.0"
+pycodestyle = ">=2.9.0"
+pydocstyle = ">=2.0.0"
+pyflakes = ">=2.2.0,<3"
+pylint = ">=2.8.3"
+pylint-celery = "0.3"
+pylint-django = ">=2.5,<2.6"
+pylint-flask = "0.6"
+pylint-plugin-utils = ">=0.7,<0.8"
+PyYAML = "*"
+requirements-detector = ">=1.2.0"
+setoptconf-tmp = ">=0.3.1,<0.4.0"
+toml = ">=0.10.2,<0.11.0"
+
+[package.extras]
+with-bandit = ["bandit (>=1.5.1)"]
+with-everything = ["bandit (>=1.5.1)", "mypy (>=0.600)", "pyright (>=1.1.3)", "pyroma (>=2.4)", "vulture (>=1.5)"]
+with-mypy = ["mypy (>=0.600)"]
+with-pyright = ["pyright (>=1.1.3)"]
+with-pyroma = ["pyroma (>=2.4)"]
+with-vulture = ["vulture (>=1.5)"]
+
 [[package]]
 name = "psycopg2"
 version = "2.9.7"
@@ -1047,6 +1285,18 @@ files = [
     {file = "psycopg2-2.9.7.tar.gz", hash = "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8"},
 ]
 
+[[package]]
+name = "pycodestyle"
+version = "2.11.0"
+description = "Python style guide checker"
+category = "dev"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pycodestyle-2.11.0-py2.py3-none-any.whl", hash = "sha256:5d1013ba8dc7895b548be5afb05740ca82454fd899971563d2ef625d090326f8"},
+    {file = "pycodestyle-2.11.0.tar.gz", hash = "sha256:259bcc17857d8a8b3b4a2327324b79e5f020a13c16074670f9c8c8f872ea76d0"},
+]
+
 [[package]]
 name = "pydantic"
 version = "1.10.12"
@@ -1100,6 +1350,130 @@ typing-extensions = ">=4.2.0"
 dotenv = ["python-dotenv (>=0.10.4)"]
 email = ["email-validator (>=1.0.3)"]
 
+[[package]]
+name = "pydocstyle"
+version = "6.3.0"
+description = "Python docstring style checker"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019"},
+    {file = "pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1"},
+]
+
+[package.dependencies]
+snowballstemmer = ">=2.2.0"
+
+[package.extras]
+toml = ["tomli (>=1.2.3)"]
+
+[[package]]
+name = "pyflakes"
+version = "2.5.0"
+description = "passive checker of Python programs"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
+    {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
+]
+
+[[package]]
+name = "pylint"
+version = "2.17.5"
+description = "python code static checker"
+category = "dev"
+optional = false
+python-versions = ">=3.7.2"
+files = [
+    {file = "pylint-2.17.5-py3-none-any.whl", hash = "sha256:73995fb8216d3bed149c8d51bba25b2c52a8251a2c8ac846ec668ce38fab5413"},
+    {file = "pylint-2.17.5.tar.gz", hash = "sha256:f7b601cbc06fef7e62a754e2b41294c2aa31f1cb659624b9a85bcba29eaf8252"},
+]
+
+[package.dependencies]
+astroid = ">=2.15.6,<=2.17.0-dev0"
+colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
+dill = [
+    {version = ">=0.2", markers = "python_version < \"3.11\""},
+    {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
+]
+isort = ">=4.2.5,<6"
+mccabe = ">=0.6,<0.8"
+platformdirs = ">=2.2.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+tomlkit = ">=0.10.1"
+
+[package.extras]
+spelling = ["pyenchant (>=3.2,<4.0)"]
+testutils = ["gitpython (>3)"]
+
+[[package]]
+name = "pylint-celery"
+version = "0.3"
+description = "pylint-celery is a Pylint plugin to aid Pylint in recognising and understandingerrors caused when using the Celery library"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pylint-celery-0.3.tar.gz", hash = "sha256:41e32094e7408d15c044178ea828dd524beedbdbe6f83f712c5e35bde1de4beb"},
+]
+
+[package.dependencies]
+astroid = ">=1.0"
+pylint = ">=1.0"
+pylint-plugin-utils = ">=0.2.1"
+
+[[package]]
+name = "pylint-django"
+version = "2.5.3"
+description = "A Pylint plugin to help Pylint understand the Django web framework"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pylint-django-2.5.3.tar.gz", hash = "sha256:0ac090d106c62fe33782a1d01bda1610b761bb1c9bf5035ced9d5f23a13d8591"},
+    {file = "pylint_django-2.5.3-py3-none-any.whl", hash = "sha256:56b12b6adf56d548412445bd35483034394a1a94901c3f8571980a13882299d5"},
+]
+
+[package.dependencies]
+pylint = ">=2.0,<3"
+pylint-plugin-utils = ">=0.7"
+
+[package.extras]
+for-tests = ["coverage", "django-tables2", "django-tastypie", "factory-boy", "pylint (>=2.13)", "pytest", "wheel"]
+with-django = ["Django"]
+
+[[package]]
+name = "pylint-flask"
+version = "0.6"
+description = "pylint-flask is a Pylint plugin to aid Pylint in recognizing and understanding errors caused when using Flask"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pylint-flask-0.6.tar.gz", hash = "sha256:f4d97de2216bf7bfce07c9c08b166e978fe9f2725de2a50a9845a97de7e31517"},
+]
+
+[package.dependencies]
+pylint-plugin-utils = ">=0.2.1"
+
+[[package]]
+name = "pylint-plugin-utils"
+version = "0.7"
+description = "Utilities and helpers for writing Pylint plugins"
+category = "dev"
+optional = false
+python-versions = ">=3.6.2"
+files = [
+    {file = "pylint-plugin-utils-0.7.tar.gz", hash = "sha256:ce48bc0516ae9415dd5c752c940dfe601b18fe0f48aa249f2386adfa95a004dd"},
+    {file = "pylint_plugin_utils-0.7-py3-none-any.whl", hash = "sha256:b3d43e85ab74c4f48bb46ae4ce771e39c3a20f8b3d56982ab17aa73b4f98d535"},
+]
+
+[package.dependencies]
+pylint = ">=1.7"
+
 [[package]]
 name = "python-dotenv"
 version = "1.0.0"
@@ -1305,6 +1679,51 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requirements-detector"
+version = "1.2.2"
+description = "Python tool to find and list requirements of a Python project"
+category = "dev"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+    {file = "requirements_detector-1.2.2-py3-none-any.whl", hash = "sha256:d7c60493bf166da3dd59de0e6cb25765e0e32a1931aeae92614034e5786d0bd0"},
+    {file = "requirements_detector-1.2.2.tar.gz", hash = "sha256:3642cd7a5b261d79536c36bb7ecacf2adabd902d2e0e42bfb2ba82515da10501"},
+]
+
+[package.dependencies]
+astroid = ">=2.0,<3.0"
+packaging = ">=21.3"
+semver = ">=3.0.0,<4.0.0"
+toml = ">=0.10.2,<0.11.0"
+
+[[package]]
+name = "semver"
+version = "3.0.1"
+description = "Python helper for Semantic Versioning (https://semver.org)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "semver-3.0.1-py3-none-any.whl", hash = "sha256:2a23844ba1647362c7490fe3995a86e097bb590d16f0f32dfc383008f19e4cdf"},
+    {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"},
+]
+
+[[package]]
+name = "setoptconf-tmp"
+version = "0.3.1"
+description = "A module for retrieving program settings from various sources in a consistant method."
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "setoptconf-tmp-0.3.1.tar.gz", hash = "sha256:e0480addd11347ba52f762f3c4d8afa3e10ad0affbc53e3ffddc0ca5f27d5778"},
+    {file = "setoptconf_tmp-0.3.1-py3-none-any.whl", hash = "sha256:76035d5cd1593d38b9056ae12d460eca3aaa34ad05c315b69145e138ba80a745"},
+]
+
+[package.extras]
+yaml = ["pyyaml"]
+
 [[package]]
 name = "smmap"
 version = "5.0.0"
@@ -1329,6 +1748,18 @@ files = [
     {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
 ]
 
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+    {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.20"
@@ -1488,6 +1919,18 @@ requests = ">=2.26.0"
 [package.extras]
 blobfile = ["blobfile (>=2)"]
 
+[[package]]
+name = "toml"
+version = "0.10.2"
+description = "Python Library for Tom's Obvious, Minimal Language"
+category = "dev"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
+    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+]
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -1500,6 +1943,18 @@ files = [
     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
 ]
 
+[[package]]
+name = "tomlkit"
+version = "0.12.1"
+description = "Style preserving TOML library"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"},
+    {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"},
+]
+
 [[package]]
 name = "tqdm"
 version = "4.66.1"
@@ -1587,6 +2042,91 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
 [package.extras]
 standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
 
+[[package]]
+name = "wrapt"
+version = "1.15.0"
+description = "Module for decorators, wrappers and monkey patching."
+category = "dev"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+files = [
+    {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"},
+    {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"},
+    {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"},
+    {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"},
+    {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"},
+    {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"},
+    {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"},
+    {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"},
+    {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"},
+    {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"},
+    {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"},
+    {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"},
+    {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"},
+    {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"},
+    {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"},
+    {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"},
+    {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"},
+    {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"},
+    {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"},
+    {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"},
+    {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"},
+    {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"},
+    {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"},
+    {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"},
+    {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"},
+    {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"},
+    {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"},
+]
+
 [[package]]
 name = "yarl"
 version = "1.9.2"
@@ -1678,4 +2218,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "87a7b3b9660b3adad9c83f2095febfcb80d6bf6b75c30be1a3f29b1716da9aa2"
+content-hash = "0d1a3b7f1022776e878127bb7296ab78ca439dd4b5c46af7bd70e05ba28a9198"
diff --git a/module_programming_llm/pyproject.toml b/module_programming_llm/pyproject.toml
index 425b47051..e785707cc 100644
--- a/module_programming_llm/pyproject.toml
+++ b/module_programming_llm/pyproject.toml
@@ -19,6 +19,9 @@ tiktoken = "^0.4.0"
 [tool.poetry.scripts]
 module = "athena:run_module"
 
+[tool.poetry.group.dev.dependencies]
+prospector = "^1.10.2"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

From dc9229d82c8f2af3e1d5eaa24d539cde68bd3772 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 21:09:42 +0200
Subject: [PATCH 03/51] add changes

---
 athena/athena/helpers/programming/code_repository.py       | 4 ++--
 athena/athena/schemas/programming_exercise.py              | 2 +-
 .../prompts/split_grading_instructions_by_file.py          | 2 ++
 .../prompts/split_problem_statement_by_file.py             | 4 +++-
 .../split_grading_instructions_by_file.py                  | 7 +++++++
 5 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/athena/athena/helpers/programming/code_repository.py b/athena/athena/helpers/programming/code_repository.py
index 32c264905..53c6f1595 100644
--- a/athena/athena/helpers/programming/code_repository.py
+++ b/athena/athena/helpers/programming/code_repository.py
@@ -43,7 +43,7 @@ def get_repository(url: str) -> Repo:
         repo_zip.extractall(cache_dir_path)
         if not (cache_dir_path / ".git").exists():
             repo = Repo.init(cache_dir_path, initial_branch='main')
-            repo.index.add(repo.untracked_files)
-            repo.index.commit("Initial commit")
+            repo.git.add(all=True, force=True)
+            repo.git.commit('-m', 'Initial commit')
 
     return Repo(cache_dir_path)
\ No newline at end of file
diff --git a/athena/athena/schemas/programming_exercise.py b/athena/athena/schemas/programming_exercise.py
index 0f9d40c44..2ac9610e5 100644
--- a/athena/athena/schemas/programming_exercise.py
+++ b/athena/athena/schemas/programming_exercise.py
@@ -1,6 +1,6 @@
 from pydantic import Field, AnyUrl
 from zipfile import ZipFile
-from git import Repo
+from git.repo import Repo
 
 from athena.helpers.programming.code_repository import get_repository_zip, get_repository
 from .exercise_type import ExerciseType
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 7c0cc685d..833f7ddd6 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -10,4 +10,6 @@
 
 Changed files:
 {changed_files}
+
+Grading instructions by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index a1ee99f9f..6175ec07f 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file.
+Restructure the problem statement by changed file.
 """
 
 human_template = """\
@@ -10,4 +10,6 @@
 
 Changed files:
 {changed_files}
+
+Problem statement by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index fefa93e14..0982e5c2a 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -5,6 +5,7 @@
 
 from athena.programming import Exercise
 from athena.storage import store_exercise
+from athena.logger import logger
 
 from module_programming_llm.config import BasicApproachConfig
 from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
@@ -44,6 +45,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
     file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
     changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
 
+    # logger.info("Exercise: %s", file_extension)
+    # logger.info("Changed files: %s", changed_files)
+    # logger.info("Solution repo: %s", solution_repo)
+    # logger.info("Template repo: %s", template_repo)
+    # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
         system_message=config.split_grading_instructions_by_file_prompt.system_message, 

From 3ddb848028aeff091afd22217edb8eda1d60a45a Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 21:30:15 +0200
Subject: [PATCH 04/51] change prompt

---
 module_programming_llm/module_programming_llm/config.py  | 8 ++++----
 .../prompts/split_grading_instructions_by_file.py        | 9 ++++++---
 .../prompts/split_problem_statement_by_file.py           | 7 +++++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 173ec1255..0d6cc7e95 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -18,10 +18,11 @@
 
 class SplitProblemStatementsByFilePrompt(BaseModel):
     """\
-Features available: **{problem_statement}**, **{changed_files}**\
+Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 
 *Note: `changed_files` are the changed files between template and solution repository.*\
 """
+    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_problem_statements_by_file_human_template,
@@ -30,10 +31,9 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
-Features available: **{grading_instructions}**, **{changed_files}**
-
-*Note: `changed_files` are the changed files between template and solution repository.*\
+Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 """
+    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_grading_instructions_by_file_human_template,
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 833f7ddd6..2f181609f 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,15 +1,18 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file.
+Restructure the grading instructions by changed file to make it simpler.
 """
 
 human_template = """\
 Grading instructions:
 {grading_instructions}
 
-Changed files:
-{changed_files}
+Changed files from template to sample solution:
+{changed_files_from_template_to_solution}
+
+Changed files from template to student submission:
+{changed_files_from_template_to_submission}
 
 Grading instructions by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index 6175ec07f..c06aea30e 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -8,8 +8,11 @@
 Problem statement:
 {problem_statement}
 
-Changed files:
-{changed_files}
+Changed files from template to sample solution:
+{changed_files_from_template_to_solution}
+
+Changed files from template to student submission:
+{changed_files_from_template_to_submission}
 
 Problem statement by file:
 """
\ No newline at end of file

From 7cb7c44ce9956f5875fcd83ef1dac5ba9a18f264 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 22:20:06 +0200
Subject: [PATCH 05/51] update split problem statements and grading
 instructions

---
 .../module_programming_llm/__main__.py        |  8 +-
 .../split_grading_instructions_by_file.py     | 99 ++++++++++---------
 .../split_problem_statement_by_file.py        | 97 +++++++++---------
 3 files changed, 103 insertions(+), 101 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py
index e4fb070fe..786d9d824 100644
--- a/module_programming_llm/module_programming_llm/__main__.py
+++ b/module_programming_llm/module_programming_llm/__main__.py
@@ -8,18 +8,12 @@
 from module_programming_llm.config import Configuration
 
 from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file
-from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
-from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
 
 
 @submissions_consumer
-def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration):
+def receive_submissions(exercise: Exercise, submissions: List[Submission]):
     logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)
 
-    # Split problem statements and grading instructions for later
-    generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
-    generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
-
 
 @submission_selector
 def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission:
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 0982e5c2a..8d45b597f 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -3,16 +3,16 @@
 
 from pydantic import BaseModel, Field
 
-from athena.programming import Exercise
-from athena.storage import store_exercise
-from athena.logger import logger
+from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
-
-
-FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions"
+from module_programming_llm.helpers.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    num_tokens_from_string, 
+    num_tokens_from_prompt, 
+    predict_and_parse
+)
+from module_programming_llm.helpers.utils import get_diff
 
 
 class FileGradingInstruction(BaseModel):
@@ -22,34 +22,50 @@ class FileGradingInstruction(BaseModel):
 
 class SplitGradingInstructions(BaseModel):
     """Collection of grading instructions split by file"""
-    instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+    file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
-def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
+async def split_grading_instructions_by_file(
+        exercise: Exercise, 
+        submission: Submission,
+        config: BasicApproachConfig, 
+        debug: bool
+    ) -> Optional[SplitGradingInstructions]:
     """Split the general grading instructions by file
 
     Args:
-        exercise (Exercise): Exercise to split the grading instructions for
+        exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files)
+        submission (Submission): Submission to split the grading instructions for (respecting the changed files)
         config (BasicApproachConfig): Configuration
 
     Returns:
-        SplitGradingInstructions: Grading instructions split by file, empty if input was too long
+        Optional[SplitGradingInstructions]: Split grading instructions, None if it is too short or too long
     """
-    if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "":
-        return SplitGradingInstructions(instructions=[])
+
+    # Return None if the grading instructions are too short
+    if (exercise.grading_instructions is None 
+            or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split):
+        return None
     
     model = config.model.get_model()
     
-    solution_repo = exercise.get_solution_repository()
     template_repo = exercise.get_template_repository()
-    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
-
-    # logger.info("Exercise: %s", file_extension)
-    # logger.info("Changed files: %s", changed_files)
-    # logger.info("Solution repo: %s", solution_repo)
-    # logger.info("Template repo: %s", template_repo)
-    # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+    solution_repo = exercise.get_solution_repository()
+    submission_repo = submission.get_repository()
+
+    changed_files_from_template_to_solution = get_diff(
+        src_repo=template_repo, 
+        dst_repo=solution_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
 
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
@@ -60,15 +76,13 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
 
     prompt_input = {
         "grading_instructions": exercise.grading_instructions, 
-        "changed_files": changed_files
+        "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
+        "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
     }
 
-    # If the input is too long, return an empty SplitGradingInstructions object
-    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
-    if prompt_length > config.max_input_tokens:
-        if debug:
-            emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
-        return SplitGradingInstructions(instructions=[])
+    # Return None if the prompt is too long
+    if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
+        return None
 
     split_grading_instructions = predict_and_parse(
         model=model, 
@@ -78,25 +92,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
     )
 
     if debug:
-        emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict())
-
-    return split_grading_instructions
+        emit_meta("file_problem_statement", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": split_grading_instructions.dict()
+        })
 
+    if not split_grading_instructions.file_grading_instructions:
+        return None
 
-def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
-    """Generate and store the split grading instructions if needed
-
-    Args:
-        exercise (Exercise): Exercise to get the split grading instructions for
-        config (BasicApproachConfig): Configuration
-
-    Returns:
-        SplitGradingInstructions: Grading instructions split by file
-    """
-    if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta:
-        return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY])
-
-    split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug)
-    exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict()
-    store_exercise(exercise)
     return split_grading_instructions
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 8329e5749..1000ad245 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -1,17 +1,18 @@
-from typing import Sequence
+from typing import Optional, Sequence
 from athena import emit_meta
 
 from pydantic import BaseModel, Field
 
-from athena.programming import Exercise
-from athena.storage import store_exercise
+from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
-
-
-FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements"
+from module_programming_llm.helpers.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    num_tokens_from_string, 
+    num_tokens_from_prompt, 
+    predict_and_parse
+)
+from module_programming_llm.helpers.utils import get_diff
 
 
 class FileProblemStatement(BaseModel):
@@ -21,28 +22,49 @@ class FileProblemStatement(BaseModel):
 
 class SplitProblemStatement(BaseModel):
     """Collection of problem statements split by file"""
-    problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+    file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
-def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
+async def split_problem_statement_by_file(
+        exercise: Exercise, 
+        submission: Submission, 
+        config: BasicApproachConfig, 
+        debug: bool
+    ) -> Optional[SplitProblemStatement]:
     """Split the general problem statement by file
 
     Args:
-        exercise (Exercise): Exercise to split the problem statement for
+        exercise (Exercise): Exercise to split the problem statement for (respecting the changed files)
+        submission (Submission): Submission to split the problem statement for (respecting the changed files)
         config (BasicApproachConfig): Configuration
 
     Returns:
-        SplitProblemStatement: Problem statement split by file, empty if input was too long
+        Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long
     """
-    if exercise.problem_statement.strip() == "":
-        return SplitProblemStatement(problem_statements=[])
     
+    # Return None if the problem statement is too short
+    if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split:
+        return None
+
     model = config.model.get_model()
-    
-    solution_repo = exercise.get_solution_repository()
+
     template_repo = exercise.get_template_repository()
-    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
+    solution_repo = exercise.get_solution_repository()
+    submission_repo = submission.get_repository()
+
+    changed_files_from_template_to_solution = get_diff(
+        src_repo=template_repo, 
+        dst_repo=solution_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
 
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
@@ -50,18 +72,16 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon
         human_message=config.split_problem_statement_by_file_prompt.system_message,
         pydantic_object=SplitProblemStatement
     )
-
+    
     prompt_input = {
         "problem_statement": exercise.problem_statement,
-        "changed_files": changed_files
+        "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
+        "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
     }
 
-    # If the input is too long, return an empty SplitProblemStatement object
-    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
-    if prompt_length > config.max_input_tokens:
-        if debug:
-            emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
-        return SplitProblemStatement(problem_statements=[])
+    # Return None if the prompt is too long
+    if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
+        return None
 
     split_problem_statement = predict_and_parse(
         model=model, 
@@ -71,25 +91,12 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon
     )
 
     if debug:
-        emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict())
+        emit_meta("file_problem_statement", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": split_problem_statement.dict()
+        })
 
-    return split_problem_statement
-
-
-def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
-    """Generate and store the split problem statement if needed
-
-    Args:
-        exercise (Exercise): Exercise to split the problem statement for
-        config (BasicApproachConfig): Configuration
-
-    Returns:
-        SplitProblemStatement: Problem statement split by file
-    """
-    if FILE_PROBLEM_STATEMETS_KEY in exercise.meta:
-        return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY])
+    if not split_problem_statement.file_problem_statements:
+        return None
 
-    split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug)
-    exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict()
-    store_exercise(exercise)
     return split_problem_statement

From cf76e06f521ae3bc771a3aac6967f606422ed67c Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 23:46:04 +0200
Subject: [PATCH 06/51] refactor generator

---
 .../module_programming_llm/config.py          |   5 +-
 .../generate_suggestions_by_file.py           | 246 +++++++++++-------
 .../helpers/llm_utils.py                      |  21 +-
 .../module_programming_llm/helpers/utils.py   |   2 +-
 .../prompts/generate_suggestions_by_file.py   |   4 +-
 .../split_grading_instructions_by_file.py     |   2 +-
 .../split_problem_statement_by_file.py        |   2 +-
 7 files changed, 157 insertions(+), 125 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 0d6cc7e95..3507eded0 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -43,9 +43,9 @@ class SplitGradingInstructionsByFilePrompt(BaseModel):
 class GenerationPrompt(BaseModel):
     """\
 Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \
-**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**
+**{submission_file}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}**
 
-*Note: Prompt will be applied per file independently, submission is a single file.*\
+*Note: Prompt will be applied per file independently. Also, you don't have to include all features, e.g. template_to_solution_diff.*\
 """
     system_message: str = Field(default=generate_suggestions_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -58,6 +58,7 @@ class BasicApproachConfig(BaseModel):
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig())  # type: ignore
     
+    max_number_of_files: int = Field(default=25, description="Maximum number of files.")
     split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt())
     split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt())
     generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt())
diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index bc93269eb..ad8daf47e 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -1,17 +1,25 @@
 from typing import List, Optional, Sequence
-
+import asyncio
 from pydantic import BaseModel, Field
-from langchain.chains.openai_functions import create_structured_output_chain
 
 from athena import emit_meta
 from athena.programming import Exercise, Submission, Feedback
-from athena.logger import logger
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
-from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
-from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
+from module_programming_llm.split_grading_instructions_by_file import split_grading_instructions_by_file
+from module_programming_llm.split_problem_statement_by_file import split_problem_statement_by_file
+from module_programming_llm.helpers.llm_utils import (
+    check_prompt_length_and_omit_features_if_necessary, 
+    get_chat_prompt_with_formatting_instructions,
+    num_tokens_from_string,
+    predict_and_parse,
+)
+from module_programming_llm.helpers.utils import(
+    get_diff,
+    load_files_from_repo, 
+    add_line_numbers, 
+    get_programming_language_file_extension
+)
 
 
 class FeedbackModel(BaseModel):
@@ -38,13 +46,26 @@ class Config:
 async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
-    # Get split grading instructions
-    split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug)
-    file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions }
+    # Get split problem statement and grading instructions by file (if necessary)
+    split_problem_statement, split_grading_instructions = await asyncio.gather(
+        split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug),
+        split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug)
+    )
+
+    is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
+    file_problem_statements = { 
+        item.file_name: item.problem_statement 
+        for item in split_problem_statement.file_problem_statements 
+    } if split_problem_statement is not None else {}
 
-    # Get split problem statement
-    split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug)
-    file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements }
+    is_short_grading_instructions = (
+        num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split 
+        if exercise.grading_instructions is not None else True
+    )
+    file_grading_instructions = { 
+        item.file_name: item.grading_instructions 
+        for item in split_grading_instructions.file_grading_instructions 
+    } if split_grading_instructions is not None else {}
 
     prompt_inputs: List[dict] = []
     
@@ -53,33 +74,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     template_repo = exercise.get_template_repository()
     submission_repo = submission.get_repository()
     
-    file_extension = get_programming_language_file_extension(exercise.programming_language)
-    if file_extension is None:
-        raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
-
-    files = load_files_from_repo(
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    # Changed text files
+    changed_files = load_files_from_repo(
         submission_repo, 
-        file_filter=lambda x: x.endswith(file_extension) if file_extension else False
+        file_filter=lambda x: x in changed_files_from_template_to_submission
     )
 
-    for file_path, content in files.items():
-        if content is None:
-            continue
-        
-        problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.")
-        grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.")
-
-        content = add_line_numbers(content)
-        solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
-        template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
+    for file_path, file_content in changed_files.items():
+        problem_statement = (
+            exercise.problem_statement if is_short_problem_statement 
+            else file_problem_statements.get(file_path, "No relevant problem statement section found.")
+        )
+        problem_statement = problem_statement if problem_statement.strip() else "No problem statement found."
+
+        grading_instructions = (
+            exercise.grading_instructions or "" if is_short_grading_instructions
+            else file_grading_instructions.get(file_path, "No relevant grading instructions found.")
+        )
+        grading_instructions = grading_instructions if grading_instructions.strip() else "No grading instructions found."
+
+        file_content = add_line_numbers(file_content)
+        solution_to_submission_diff = get_diff(
+            src_repo=solution_repo, 
+            dst_repo=submission_repo, 
+            src_prefix="solution", 
+            dst_prefix="submission", 
+            file_path=file_path
+        )
+        template_to_submission_diff = get_diff(
+            src_repo=template_repo, 
+            dst_repo=submission_repo, 
+            src_prefix="template", 
+            dst_prefix="submission", 
+            file_path=file_path
+        )
+        template_to_solution_diff = get_diff(
+            src_repo=template_repo, 
+            dst_repo=solution_repo, 
+            src_prefix="template", 
+            dst_prefix="solution", 
+            file_path=file_path
+        )
 
         prompt_inputs.append({
             "file_path": file_path,
-            "submission": content,
+            "priority": len(template_to_solution_diff),
+            "submission_file": file_content,
             "max_points": exercise.max_points,
             "bonus_points": exercise.bonus_points,
             "solution_to_submission_diff": solution_to_submission_diff,
             "template_to_submission_diff": template_to_submission_diff,
+            "template_to_solution_diff": template_to_solution_diff,
             "grading_instructions": grading_instructions,
             "problem_statement": problem_statement,
         })
@@ -93,11 +145,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
 
     # Filter long prompts (omitting features if necessary)
     omittable_features = [
+        "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
         "problem_statement", 
         "grading_instructions",
+        "solution_to_submission_diff",
         "template_to_submission_diff",
-        "solution_to_submission_diff"
     ]
+    # "submission_file" is not omittable, because it is the main input containing the line numbers
+    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
+
     prompt_inputs = [
         omitted_prompt_input for omitted_prompt_input, should_run in
         [check_prompt_length_and_omit_features_if_necessary(
@@ -110,70 +166,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         if should_run
     ]
 
-    chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt)
-    if not prompt_inputs:
-        return []
-    result = await chain.agenerate(prompt_inputs)
-
-    logger.info("Generated result: %s ", result)
-
-    return []
-    # return predict_and_parse(
-    #     model=model, 
-    #     chat_prompt=chat_prompt, 
-    #     prompt_input={
-    #         "grading_instructions": exercise.grading_instructions, 
-    #         "changed_files": changed_files
-    #     }, 
-    #     pydantic_object=SplitGradingInstructions
-    # )
-
-
-
-
-
-# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
-    
-#     # Filter long prompts
-#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
-
-#     # Completion
-#     chain = LLMChain(llm=chat, prompt=chat_prompt)
-#     if not input_list:
-#         return []
-#     result = await chain.agenerate(input_list)
-    
-#     # Parse result
-#     feedback_proposals: List[Feedback] = []
-#     for input, generations in zip(input_list, result.generations):
-#         file_path = input["file_path"]
-#         for generation in generations:
-#             try:
-#                 feedbacks = json.loads(generation.text)
-#             except json.JSONDecodeError:
-#                 logger.error("Failed to parse feedback json: %s", generation.text)
-#                 continue
-#             if not isinstance(feedbacks, list):
-#                 logger.error("Feedback json is not a list: %s", generation.text)
-#                 continue
-
-#             for feedback in feedbacks:
-#                 line = feedback.get("line", None)
-#                 description = feedback.get("text", None)
-#                 credits = feedback.get("credits", 0.0)
-#                 feedback_proposals.append(
-#                     Feedback(
-#                         id=None,
-#                         exercise_id=exercise.id,
-#                         submission_id=submission.id,
-#                         title="Feedback",
-#                         description=description,
-#                         file_path=file_path,
-#                         line_start=line,
-#                         line_end=None,
-#                         credits=credits,
-#                         meta={},
-#                     )
-#                 )
-
-#     return feedback_proposals
\ No newline at end of file
+    # If we have many files we need to filter and prioritize them
+    if len(prompt_inputs) > config.max_number_of_files:
+        programming_language_extension = get_programming_language_file_extension(programming_language=exercise.programming_language)
+
+        # Prioritize files that have a diff between solution and submission
+        prompt_inputs = sorted(
+            prompt_inputs, 
+            key=lambda x: x["priority"], 
+            reverse=True
+        )
+
+        filtered_prompt_inputs = []
+        if programming_language_extension is not None:
+            filtered_prompt_inputs = [
+                prompt_input 
+                for prompt_input in prompt_inputs 
+                if prompt_input["file_path"].endswith(programming_language_extension)
+            ]
+
+        while len(filtered_prompt_inputs) < config.max_number_of_files and prompt_inputs:
+            filtered_prompt_inputs.append(prompt_inputs.pop(0))
+        prompt_inputs = filtered_prompt_inputs
+   
+    results: List[AssessmentModel] = await asyncio.gather(*[
+        predict_and_parse(
+            model=model, 
+            chat_prompt=chat_prompt, 
+            prompt_input=prompt_input, 
+            pydantic_object=AssessmentModel
+        ) for prompt_input in prompt_inputs
+    ])
+
+    if debug:
+        emit_meta(
+            "generate_suggestions", [
+                {
+                    "file_path": prompt_input["file_path"],
+                    "prompt": chat_prompt.format(**prompt_input),
+                    "result": result.dict()
+                }
+                for prompt_input, result in zip(prompt_inputs, results)
+            ]
+        )
+
+    feedbacks: List[Feedback] = []
+    for prompt_input, result in zip(prompt_inputs, results):
+        file_path = prompt_input["file_path"]
+        for feedback in result.feedbacks:
+            feedbacks.append(Feedback(
+                exercise_id=exercise.id,
+                submission_id=submission.id,
+                title=feedback.title,
+                description=feedback.description,
+                file_path=file_path,
+                line_start=feedback.line_start,
+                line_end=feedback.line_end,
+                credits=feedback.credits,
+                meta={}
+            ))
+
+    return feedbacks
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index 53a300f00..b59ca9dab 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions(
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 
-def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
     """Predicts and parses the output of the model
 
     Args:
@@ -131,22 +131,3 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate,
     chain = LLMChain(llm=model, prompt=chat_prompt)
     output = chain.run(**prompt_input)
     return output_parser.parse(output)
-
-
-async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
-    """Generates and parses the output of the model
-
-    Args:
-        model (BaseLanguageModel): The model to generate with
-        chat_prompt (ChatPromptTemplate): Prompt to use
-        prompt_input (dict): Input parameters to use for the prompt
-        pydantic_object (Type[T]): Pydantic model to parse the output
-    """
-    if supports_function_calling(model):
-        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
-        return chain.run(**prompt_input)
-    
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
-    chain = LLMChain(llm=model, prompt=chat_prompt)
-    output = chain.run(**prompt_input)
-    return output_parser.parse(output)
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py
index 8f6c67ab8..894f4a6a6 100644
--- a/module_programming_llm/module_programming_llm/helpers/utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/utils.py
@@ -9,7 +9,7 @@
 from langchain.document_loaders import GitLoader
 
 
-def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, Optional[str]]:
+def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]:
     return {
         doc.metadata['file_path']: doc.page_content
         for doc in GitLoader(repo_path=str(repo.working_tree_dir), file_filter=file_filter).load()
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index f1a300ae0..2e6b27059 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -16,8 +16,8 @@
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}
 
-Student\'s submission to grade (with line numbers <number>: <line>):
-{submission}
+Student\'s submission file to grade (with line numbers <number>: <line>):
+{submission_file}
 
 Diff between solution (deletions) and student\'s submission (additions):
 {solution_to_submission_diff}
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 8d45b597f..cb1f4a29f 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -84,7 +84,7 @@ async def split_grading_instructions_by_file(
     if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
         return None
 
-    split_grading_instructions = predict_and_parse(
+    split_grading_instructions = await predict_and_parse(
         model=model, 
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input, 
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 1000ad245..28df233bf 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -83,7 +83,7 @@ async def split_problem_statement_by_file(
     if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
         return None
 
-    split_problem_statement = predict_and_parse(
+    split_problem_statement = await predict_and_parse(
         model=model, 
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input,

From 6ac924f45b2af46e53849dde48e54dfd76ff8fed Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 23:46:16 +0200
Subject: [PATCH 07/51] fix spacing

---
 .../module_programming_llm/generate_suggestions_by_file.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index ad8daf47e..22240d708 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -14,7 +14,7 @@
     num_tokens_from_string,
     predict_and_parse,
 )
-from module_programming_llm.helpers.utils import(
+from module_programming_llm.helpers.utils import (
     get_diff,
     load_files_from_repo, 
     add_line_numbers, 

From 54a9823e2c5941f66f9054c6ba153022ee13d2d8 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 00:06:29 +0200
Subject: [PATCH 08/51] fix stuff

---
 .../module_programming_llm/generate_suggestions_by_file.py | 7 ++++++-
 .../prompts/generate_suggestions_by_file.py                | 3 ---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 22240d708..93dc67564 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -1,4 +1,5 @@
 from typing import List, Optional, Sequence
+import os
 import asyncio
 from pydantic import BaseModel, Field
 
@@ -80,11 +81,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         file_path=None, 
         name_only=True
     ).split("\n")
+    changed_files_from_template_to_submission = [
+        os.path.join(str(submission_repo.working_tree_dir or ""), file_path)
+        for file_path in changed_files_from_template_to_submission
+    ]
 
     # Changed text files
     changed_files = load_files_from_repo(
         submission_repo, 
-        file_filter=lambda x: x in changed_files_from_template_to_submission
+        file_filter=lambda file_path: file_path in changed_files_from_template_to_submission
     )
 
     for file_path, file_content in changed_files.items():
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index 2e6b27059..344d635d0 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -9,9 +9,6 @@
 Problem statement:
 {problem_statement}
 
-Example solution:
-{example_solution}
-
 Grading instructions:
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}

From f53feb98b9450afebe3bbc6843a8794d951f84c8 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:41:39 +0200
Subject: [PATCH 09/51] add fixes

---
 module_programming_llm/module_programming_llm/config.py    | 7 +++----
 .../prompts/split_problem_statement_by_file.py             | 2 +-
 .../split_problem_statement_by_file.py                     | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 3507eded0..3d9225a1f 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -22,23 +22,22 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 *Note: `changed_files` are the changed files between template and solution repository.*\
 """
-    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_problem_statements_by_file_human_template,
                                description="Message from a human. The input on which the AI is supposed to act.")
-
+    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
+    
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
 Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 """
-    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_grading_instructions_by_file_human_template,
                                description="Message from a human. The input on which the AI is supposed to act.")
-
+    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
 
 class GenerationPrompt(BaseModel):
     """\
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index c06aea30e..b92710c07 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -11,7 +11,7 @@
 Changed files from template to sample solution:
 {changed_files_from_template_to_solution}
 
-Changed files from template to student submission:
+Changed files from template to student submission (Pick from this list, very important!):
 {changed_files_from_template_to_submission}
 
 Problem statement by file:
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 28df233bf..1cb35f7c8 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -69,7 +69,7 @@ async def split_problem_statement_by_file(
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
         system_message=config.split_problem_statement_by_file_prompt.system_message,
-        human_message=config.split_problem_statement_by_file_prompt.system_message,
+        human_message=config.split_problem_statement_by_file_prompt.human_message,
         pydantic_object=SplitProblemStatement
     )
     

From 480f5256b5ecf6a6db1e1fc44e24a88af3869009 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:51:13 +0200
Subject: [PATCH 10/51] more fixes

---
 .../generate_suggestions_by_file.py           | 19 ++++++++++---------
 .../split_grading_instructions_by_file.py     |  4 ++--
 .../split_problem_statement_by_file.py        |  2 +-
 .../split_grading_instructions_by_file.py     | 12 ++++++++++--
 .../split_problem_statement_by_file.py        |  8 ++++++++
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 93dc67564..732ab557b 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -47,10 +47,18 @@ class Config:
 async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_by_file_prompt.system_message, 
+        human_message=config.generate_suggestions_by_file_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+
     # Get split problem statement and grading instructions by file (if necessary)
     split_problem_statement, split_grading_instructions = await asyncio.gather(
-        split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug),
-        split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug)
+        split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug),
+        split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug)
     )
 
     is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
@@ -141,13 +149,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
             "problem_statement": problem_statement,
         })
     
-    chat_prompt = get_chat_prompt_with_formatting_instructions(
-        model=model, 
-        system_message=config.generate_suggestions_by_file_prompt.system_message, 
-        human_message=config.generate_suggestions_by_file_prompt.human_message, 
-        pydantic_object=AssessmentModel
-    )
-
     # Filter long prompts (omitting features if necessary)
     omittable_features = [
         "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 2f181609f..54f2872f6 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file to make it simpler.
+Restructure the grading instructions by student changed file to make it simpler.
 """
 
 human_template = """\
@@ -11,7 +11,7 @@
 Changed files from template to sample solution:
 {changed_files_from_template_to_solution}
 
-Changed files from template to student submission:
+Changed files from template to student submission (Pick from this list, very important!):
 {changed_files_from_template_to_submission}
 
 Grading instructions by file:
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index b92710c07..397e34893 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the problem statement by changed file.
+Restructure the problem statement by student changed file to make it simpler.
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index cb1f4a29f..97903d809 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -3,6 +3,8 @@
 
 from pydantic import BaseModel, Field
 
+from langchain.prompts import ChatPromptTemplate
+
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -28,6 +30,7 @@ class SplitGradingInstructions(BaseModel):
 async def split_grading_instructions_by_file(
         exercise: Exercise, 
         submission: Submission,
+        prompt: ChatPromptTemplate,
         config: BasicApproachConfig, 
         debug: bool
     ) -> Optional[SplitGradingInstructions]:
@@ -36,6 +39,7 @@ async def split_grading_instructions_by_file(
     Args:
         exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files)
         submission (Submission): Submission to split the grading instructions for (respecting the changed files)
+        prompt (ChatPromptTemplate): Prompt template to check for grading_instructions
         config (BasicApproachConfig): Configuration
 
     Returns:
@@ -44,7 +48,11 @@ async def split_grading_instructions_by_file(
 
     # Return None if the grading instructions are too short
     if (exercise.grading_instructions is None 
-            or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split):
+            or num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split):
+        return None
+
+    # Return None if the grading instructions are not in the prompt
+    if "grading_instructions" not in prompt.input_variables:
         return None
     
     model = config.model.get_model()
@@ -92,7 +100,7 @@ async def split_grading_instructions_by_file(
     )
 
     if debug:
-        emit_meta("file_problem_statement", {
+        emit_meta("file_grading_instructions", {
             "prompt": chat_prompt.format(**prompt_input),
             "result": split_grading_instructions.dict()
         })
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 1cb35f7c8..4790f8ce8 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -3,6 +3,8 @@
 
 from pydantic import BaseModel, Field
 
+from langchain.prompts import ChatPromptTemplate
+
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -28,6 +30,7 @@ class SplitProblemStatement(BaseModel):
 async def split_problem_statement_by_file(
         exercise: Exercise, 
         submission: Submission, 
+        prompt: ChatPromptTemplate,
         config: BasicApproachConfig, 
         debug: bool
     ) -> Optional[SplitProblemStatement]:
@@ -36,6 +39,7 @@ async def split_problem_statement_by_file(
     Args:
         exercise (Exercise): Exercise to split the problem statement for (respecting the changed files)
         submission (Submission): Submission to split the problem statement for (respecting the changed files)
+        prompt (ChatPromptTemplate): Prompt template to check for problem_statement
         config (BasicApproachConfig): Configuration
 
     Returns:
@@ -45,6 +49,10 @@ async def split_problem_statement_by_file(
     # Return None if the problem statement is too short
     if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split:
         return None
+    
+    # Return None if the problem statement not in the prompt
+    if "problem_statement" not in prompt.input_variables:
+        return None
 
     model = config.model.get_model()
 

From c94d4566de8b058f90c13fdb349dad8bb32de3ca Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:51:29 +0200
Subject: [PATCH 11/51] remove empty line

---
 .../module_programming_llm/generate_suggestions_by_file.py       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 732ab557b..c3cd1201e 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -54,7 +54,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         pydantic_object=AssessmentModel
     )
 
-
     # Get split problem statement and grading instructions by file (if necessary)
     split_problem_statement, split_grading_instructions = await asyncio.gather(
         split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug),

From 62b6929224ce09dc9ffa513a8d293d0b13adcfbb Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:53:30 +0200
Subject: [PATCH 12/51] delete unuse

---
 .../basic/basic_feedback_provider.py          | 110 ------------------
 1 file changed, 110 deletions(-)
 delete mode 100644 module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py

diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
deleted file mode 100644
index 134fd0e41..000000000
--- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# import json
-# from typing import List
-
-# from langchain.chains import LLMChain
-# from langchain.prompts import (
-#     ChatPromptTemplate,
-#     SystemMessagePromptTemplate,
-#     HumanMessagePromptTemplate,
-# )
-
-# from athena.programming import Exercise, Submission, Feedback
-# from athena.logger import logger
-
-# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
-# from module_programming_llm.helpers.models import chat
-
-# from ..prompts.basic_feedback_provider import system_template, human_template
-
-# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
-#     max_prompt_length = 2560
-#     input_list: List[dict] = []
-
-#     if exercise.meta['file_grading_instructions'] is None:
-#         raise ValueError("No file grading instructions found for exercise in meta.")
-#     if exercise.meta['file_problem_statements'] is None:
-#         raise ValueError("No file problem statements found for exercise in meta.")
-
-#     # Feature extraction
-#     solution_repo = exercise.get_solution_repository()
-#     template_repo = exercise.get_template_repository()
-#     submission_repo = submission.get_repository()
-    
-#     file_extension = get_programming_language_file_extension(exercise.programming_language)
-#     if file_extension is None:
-#         raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
-
-#     for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items():
-#         if submission_content is None:
-#             continue
-            
-#         problem_statement = exercise.meta['file_problem_statements'].get(file_path)
-#         if problem_statement is None:
-#             logger.info("No problem statement for %s, skipping.", file_path)
-#             continue
-
-#         grading_instructions = exercise.meta['file_grading_instructions'].get(file_path)
-#         if grading_instructions is None:
-#             logger.info("No grading instructions for %s, skipping.", file_path)
-#             continue
-
-#         submission_content = add_line_numbers(submission_content)
-#         solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
-#         template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
-
-#         input_list.append({
-#             "file_path": file_path,
-#             "submission_content": submission_content,
-#             "solution_to_submission_diff": solution_to_submission_diff,
-#             "template_to_submission_diff": template_to_submission_diff,
-#             "grading_instructions": grading_instructions,
-#             "problem_statement": problem_statement,
-#         })
-    
-#     system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
-#     human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
-#     chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-#     # Filter long prompts
-#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
-
-#     # Completion
-#     chain = LLMChain(llm=chat, prompt=chat_prompt)
-#     if not input_list:
-#         return []
-#     result = await chain.agenerate(input_list)
-    
-#     # Parse result
-#     feedback_proposals: List[Feedback] = []
-#     for input, generations in zip(input_list, result.generations):
-#         file_path = input["file_path"]
-#         for generation in generations:
-#             try:
-#                 feedbacks = json.loads(generation.text)
-#             except json.JSONDecodeError:
-#                 logger.error("Failed to parse feedback json: %s", generation.text)
-#                 continue
-#             if not isinstance(feedbacks, list):
-#                 logger.error("Feedback json is not a list: %s", generation.text)
-#                 continue
-
-#             for feedback in feedbacks:
-#                 line = feedback.get("line", None)
-#                 description = feedback.get("text", None)
-#                 credits = feedback.get("credits", 0.0)
-#                 feedback_proposals.append(
-#                     Feedback(
-#                         id=None,
-#                         exercise_id=exercise.id,
-#                         submission_id=submission.id,
-#                         title="Feedback",
-#                         description=description,
-#                         file_path=file_path,
-#                         line_start=line,
-#                         line_end=None,
-#                         credits=credits,
-#                         meta={},
-#                     )
-#                 )
-
-#     return feedback_proposals
\ No newline at end of file

From 4533d99eaaca22a5593bf7a63e57fa3e41034c2c Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:09:08 +0200
Subject: [PATCH 13/51] add small changes

---
 .../module_programming_llm/__main__.py              |  1 +
 .../module_programming_llm/config.py                | 13 +++++++------
 .../generate_suggestions_by_file.py                 | 12 ++++++------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py
index 786d9d824..556313bf6 100644
--- a/module_programming_llm/module_programming_llm/__main__.py
+++ b/module_programming_llm/module_programming_llm/__main__.py
@@ -33,5 +33,6 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co
 
 
 if __name__ == "__main__":
+    # Preload for token estimation later
     tiktoken.get_encoding("cl100k_base")
     app.start()
diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 3d9225a1f..743ddf693 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -18,9 +18,7 @@
 
 class SplitProblemStatementsByFilePrompt(BaseModel):
     """\
-Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
-
-*Note: `changed_files` are the changed files between template and solution repository.*\
+Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\
 """
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -31,7 +29,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
-Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
+Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\
 """
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -53,11 +51,14 @@ class GenerationPrompt(BaseModel):
 
 
 class BasicApproachConfig(BaseModel):
-    """This approach uses a LLM with a single prompt to generate feedback in a single step."""
+    """\
+This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \
+Then, it generates suggestions for each file independently.\
+"""
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig())  # type: ignore
     
-    max_number_of_files: int = Field(default=25, description="Maximum number of files.")
+    max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.")
     split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt())
     split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt())
     generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt())
diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index c3cd1201e..179881f1b 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -136,8 +136,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         )
 
         prompt_inputs.append({
-            "file_path": file_path,
-            "priority": len(template_to_solution_diff),
+            "file_path": file_path, # Not really relevant for the prompt
+            "priority": len(template_to_solution_diff), # Not really relevant for the prompt
             "submission_file": file_content,
             "max_points": exercise.max_points,
             "bonus_points": exercise.bonus_points,
@@ -149,15 +149,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         })
     
     # Filter long prompts (omitting features if necessary)
+    # "submission_file" is not omittable, because it is the main input containing the line numbers
+    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
     omittable_features = [
-        "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
+        "template_to_solution_diff", # If it is even included in the prompt (has the lowest priority since it is indirectly included in other diffs)
         "problem_statement", 
         "grading_instructions",
         "solution_to_submission_diff",
-        "template_to_submission_diff",
+        "template_to_submission_diff", # In the future we might indicate the changed lines in the submission_file additionally
     ]
-    # "submission_file" is not omittable, because it is the main input containing the line numbers
-    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
 
     prompt_inputs = [
         omitted_prompt_input for omitted_prompt_input, should_run in

From 6bf2459eebea6deda3c17069384cfe2c150b9012 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:10:47 +0200
Subject: [PATCH 14/51] typo

---
 .../module_programming_llm/split_problem_statement_by_file.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 4790f8ce8..f2c1f0f1f 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -99,7 +99,7 @@ async def split_problem_statement_by_file(
     )
 
     if debug:
-        emit_meta("file_problem_statement", {
+        emit_meta("file_problem_statements", {
             "prompt": chat_prompt.format(**prompt_input),
             "result": split_problem_statement.dict()
         })

From 3b9d0d17e0b73bb41c3a30de899f01646cee8895 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:30:33 +0200
Subject: [PATCH 15/51] add more fixes

---
 .../split_grading_instructions_by_file.py     |  5 ++---
 .../split_problem_statement_by_file.py        |  5 ++---
 .../split_grading_instructions_by_file.py     | 20 +++++++++++++++++--
 .../split_problem_statement_by_file.py        | 20 +++++++++++++++++--
 4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 54f2872f6..21b754846 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,7 +1,6 @@
 system_template = """\
-You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
-
-Restructure the grading instructions by student changed file to make it simpler.
+Your task is to restructure the grading instructions by student changed file to show a tutor \
+relevant instructions for each file. This should make it easier for the tutor to grade the assignment.\
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index 397e34893..95dde8787 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,6 @@
 system_template = """\
-You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
-
-Restructure the problem statement by student changed file to make it simpler.
+Your task is to restructure the problem statement by student changed file to show the student \
+relevant information for each file. This should make it easier for the student to solve the assignment.\
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 97903d809..a565b7e0c 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
-from athena import emit_meta
+from collections import defaultdict
 
 from pydantic import BaseModel, Field
-
 from langchain.prompts import ChatPromptTemplate
 
+from athena import emit_meta
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -108,4 +108,20 @@ async def split_grading_instructions_by_file(
     if not split_grading_instructions.file_grading_instructions:
         return None
 
+    # Join duplicate file names (some responses contain multiple grading instructions for the same file)
+    file_grading_instructions_by_file_name = defaultdict(list)
+    for file_grading_instruction in split_grading_instructions.file_grading_instructions:
+        file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction)
+
+    split_grading_instructions.file_grading_instructions = [
+        FileGradingInstruction(
+            file_name=file_name,
+            grading_instructions="\n".join(
+                file_grading_instruction.grading_instructions
+                for file_grading_instruction in file_grading_instructions
+            )
+        )
+        for file_name, file_grading_instructions in file_grading_instructions_by_file_name.items()
+    ]
+
     return split_grading_instructions
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index f2c1f0f1f..ccfc3533a 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
-from athena import emit_meta
+from collections import defaultdict
 
 from pydantic import BaseModel, Field
-
 from langchain.prompts import ChatPromptTemplate
 
+from athena import emit_meta
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -107,4 +107,20 @@ async def split_problem_statement_by_file(
     if not split_problem_statement.file_problem_statements:
         return None
 
+    # Join duplicate file names (some responses contain multiple problem statements for the same file)
+    file_problem_statements_by_file_name = defaultdict(list)
+    for file_problem_statement in split_problem_statement.file_problem_statements:
+        file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement)
+
+    split_problem_statement.file_problem_statements = [
+        FileProblemStatement(
+            file_name=file_name,
+            problem_statement="\n".join(
+                file_problem_statement.problem_statement
+                for file_problem_statement in file_problem_statements
+            )
+        )
+        for file_name, file_problem_statements in file_problem_statements_by_file_name.items()
+    ]
+
     return split_problem_statement

From f14f00c1e62ddc9c1874d8afffa6a5e38eac4b07 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:36:18 +0200
Subject: [PATCH 16/51] fix pydantic

---
 .../module_programming_llm/split_grading_instructions_by_file.py | 1 +
 .../module_programming_llm/split_problem_statement_by_file.py    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index a565b7e0c..0b1a2a615 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -27,6 +27,7 @@ class SplitGradingInstructions(BaseModel):
     file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
+# pylint: disable=too-many-locals
 async def split_grading_instructions_by_file(
         exercise: Exercise, 
         submission: Submission,
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index ccfc3533a..cc72cd3f6 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -27,6 +27,7 @@ class SplitProblemStatement(BaseModel):
     file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
+# pylint: disable=too-many-locals
 async def split_problem_statement_by_file(
         exercise: Exercise, 
         submission: Submission, 

From 814eb2ba00266a8f5ee9630a8e0408df8595cca0 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 11:56:38 +0200
Subject: [PATCH 17/51] small improvements

---
 .../generate_suggestions_by_file.py                       | 4 ++--
 .../split_grading_instructions_by_file.py                 | 8 ++++----
 .../split_problem_statement_by_file.py                    | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 179881f1b..e4d533b21 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -63,7 +63,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
     file_problem_statements = { 
         item.file_name: item.problem_statement 
-        for item in split_problem_statement.file_problem_statements 
+        for item in split_problem_statement.items 
     } if split_problem_statement is not None else {}
 
     is_short_grading_instructions = (
@@ -72,7 +72,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     )
     file_grading_instructions = { 
         item.file_name: item.grading_instructions 
-        for item in split_grading_instructions.file_grading_instructions 
+        for item in split_grading_instructions.items 
     } if split_grading_instructions is not None else {}
 
     prompt_inputs: List[dict] = []
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 0b1a2a615..388d7a865 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -24,7 +24,7 @@ class FileGradingInstruction(BaseModel):
 
 class SplitGradingInstructions(BaseModel):
     """Collection of grading instructions split by file"""
-    file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+    items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
 # pylint: disable=too-many-locals
@@ -106,15 +106,15 @@ async def split_grading_instructions_by_file(
             "result": split_grading_instructions.dict()
         })
 
-    if not split_grading_instructions.file_grading_instructions:
+    if not split_grading_instructions.items:
         return None
 
     # Join duplicate file names (some responses contain multiple grading instructions for the same file)
     file_grading_instructions_by_file_name = defaultdict(list)
-    for file_grading_instruction in split_grading_instructions.file_grading_instructions:
+    for file_grading_instruction in split_grading_instructions.items:
         file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction)
 
-    split_grading_instructions.file_grading_instructions = [
+    split_grading_instructions.items = [
         FileGradingInstruction(
             file_name=file_name,
             grading_instructions="\n".join(
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index cc72cd3f6..d423a8a48 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -24,7 +24,7 @@ class FileProblemStatement(BaseModel):
 
 class SplitProblemStatement(BaseModel):
     """Collection of problem statements split by file"""
-    file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+    items: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
 # pylint: disable=too-many-locals
@@ -105,15 +105,15 @@ async def split_problem_statement_by_file(
             "result": split_problem_statement.dict()
         })
 
-    if not split_problem_statement.file_problem_statements:
+    if not split_problem_statement.items:
         return None
 
     # Join duplicate file names (some responses contain multiple problem statements for the same file)
     file_problem_statements_by_file_name = defaultdict(list)
-    for file_problem_statement in split_problem_statement.file_problem_statements:
+    for file_problem_statement in split_problem_statement.items:
         file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement)
 
-    split_problem_statement.file_problem_statements = [
+    split_problem_statement.items = [
         FileProblemStatement(
             file_name=file_name,
             problem_statement="\n".join(

From 80658eb4c65b430908b6692782fb6bdd9bf70ad9 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 12:45:42 +0200
Subject: [PATCH 18/51] add final fixes

---
 .../generate_suggestions_by_file.py           |  4 +--
 .../helpers/llm_utils.py                      | 33 ++++++++++++-------
 .../prompts/generate_suggestions_by_file.py   | 16 ++++-----
 .../split_grading_instructions_by_file.py     |  4 +--
 .../split_problem_statement_by_file.py        |  4 +--
 5 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index e4d533b21..152fb6660 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -194,7 +194,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
             filtered_prompt_inputs.append(prompt_inputs.pop(0))
         prompt_inputs = filtered_prompt_inputs
    
-    results: List[AssessmentModel] = await asyncio.gather(*[
+    results: List[Optional[AssessmentModel]] = await asyncio.gather(*[
         predict_and_parse(
             model=model, 
             chat_prompt=chat_prompt, 
@@ -209,7 +209,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
                 {
                     "file_path": prompt_input["file_path"],
                     "prompt": chat_prompt.format(**prompt_input),
-                    "result": result.dict()
+                    "result": result.dict() if result is not None else None
                 }
                 for prompt_input, result in zip(prompt_inputs, results)
             ]
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index b59ca9dab..394bdd2f7 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -1,6 +1,5 @@
-from typing import Type, TypeVar, List
-from pydantic import BaseModel
-
+from typing import Optional, Type, TypeVar, List
+from pydantic import BaseModel, ValidationError
 import tiktoken
 
 from langchain.chains import LLMChain
@@ -11,8 +10,9 @@
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
-from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
 from langchain.chains.openai_functions import create_structured_output_chain
+from langchain.output_parsers import PydanticOutputParser
+from langchain.schema import OutputParserException
 
 from athena import emit_meta
 
@@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions(
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 
-async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]:
     """Predicts and parses the output of the model
 
     Args:
@@ -122,12 +122,23 @@ async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTem
         chat_prompt (ChatPromptTemplate): Prompt to use
         prompt_input (dict): Input parameters to use for the prompt
         pydantic_object (Type[T]): Pydantic model to parse the output
+
+    Returns:
+        Optional[T]: Parsed output, or None if it could not be parsed
     """
     if supports_function_calling(model):
         chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
-        return chain.run(**prompt_input)
-    
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
-    chain = LLMChain(llm=model, prompt=chat_prompt)
-    output = chain.run(**prompt_input)
-    return output_parser.parse(output)
+        
+        try:
+            return await chain.arun(**prompt_input)
+        except (OutputParserException, ValidationError):
+            # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+            return None
+
+    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
+    chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser)
+    try:
+        return await chain.arun(**prompt_input)
+    except (OutputParserException, ValidationError):
+        # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+        return None
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index 344d635d0..7535fd244 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -1,11 +1,6 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-VERY IMPORTANT: Effective feedback for text assignments should be:
-1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\
-"""
-
-human_template = """\
 Problem statement:
 {problem_statement}
 
@@ -13,12 +8,17 @@
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}
 
-Student\'s submission file to grade (with line numbers <number>: <line>):
-{submission_file}
-
 Diff between solution (deletions) and student\'s submission (additions):
 {solution_to_submission_diff}
 
+VERY IMPORTANT: Effective feedback for text assignments should be:
+1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\
+"""
+
+human_template = """\
 Diff between template (deletions) and student\'s submission (additions):
 {template_to_submission_diff}
+
+Student\'s submission file to grade (with line numbers <number>: <line>):
+{submission_file}
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 388d7a865..4ba10cb54 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -103,10 +103,10 @@ async def split_grading_instructions_by_file(
     if debug:
         emit_meta("file_grading_instructions", {
             "prompt": chat_prompt.format(**prompt_input),
-            "result": split_grading_instructions.dict()
+            "result": split_grading_instructions.dict() if split_grading_instructions is not None else None
         })
 
-    if not split_grading_instructions.items:
+    if split_grading_instructions is None or not split_grading_instructions.items:
         return None
 
     # Join duplicate file names (some responses contain multiple grading instructions for the same file)
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index d423a8a48..4a4761610 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -102,10 +102,10 @@ async def split_problem_statement_by_file(
     if debug:
         emit_meta("file_problem_statements", {
             "prompt": chat_prompt.format(**prompt_input),
-            "result": split_problem_statement.dict()
+            "result": split_problem_statement.dict() if split_problem_statement is not None else None
         })
 
-    if not split_problem_statement.items:
+    if split_problem_statement is None or not split_problem_statement.items:
         return None
 
     # Join duplicate file names (some responses contain multiple problem statements for the same file)

From 0da1404fcb4fa0bd94faba2ce68972b205937d4e Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:00:44 +0200
Subject: [PATCH 19/51] fix missing env variables

---
 env_example/module_text_llm.env | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/env_example/module_text_llm.env b/env_example/module_text_llm.env
index d1772bb30..c62a54bf4 100644
--- a/env_example/module_text_llm.env
+++ b/env_example/module_text_llm.env
@@ -17,7 +17,18 @@ LLM_DEFAULT_MODEL="azure_openai_gpt-35"
 LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 
 # Azure OpenAI [leave blank if not used]
-# Model names prefixed with `azure_openai_` followed by the azure deployment id, e.g. `azure_openai_gpt-35`
+# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
 LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
-LLM_AZURE_OPENAI_API_VERSION="2023-05-15" # change base if needed
+LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed
+
+# Replicate [leave blank if not used]
+# See https://replicate.com and adjust model config options in `module_text_llm/helpers/models/replicate.py`
+REPLICATE_API_TOKEN=
+
+# LangSmith (can be used for tracing LLMs) [leave blank if not used]
+# See https://docs.smith.langchain.com
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+# LANGCHAIN_API_KEY="XXX"
+# LANGCHAIN_PROJECT="XXX"
\ No newline at end of file

From 2bad6c019a03a4853240a358e6b1f53436973c42 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 21:09:42 +0200
Subject: [PATCH 20/51] add changes

---
 athena/athena/helpers/programming/code_repository.py       | 4 ++--
 athena/athena/schemas/programming_exercise.py              | 2 +-
 .../prompts/split_grading_instructions_by_file.py          | 2 ++
 .../prompts/split_problem_statement_by_file.py             | 4 +++-
 .../split_grading_instructions_by_file.py                  | 7 +++++++
 5 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/athena/athena/helpers/programming/code_repository.py b/athena/athena/helpers/programming/code_repository.py
index 32c264905..53c6f1595 100644
--- a/athena/athena/helpers/programming/code_repository.py
+++ b/athena/athena/helpers/programming/code_repository.py
@@ -43,7 +43,7 @@ def get_repository(url: str) -> Repo:
         repo_zip.extractall(cache_dir_path)
         if not (cache_dir_path / ".git").exists():
             repo = Repo.init(cache_dir_path, initial_branch='main')
-            repo.index.add(repo.untracked_files)
-            repo.index.commit("Initial commit")
+            repo.git.add(all=True, force=True)
+            repo.git.commit('-m', 'Initial commit')
 
     return Repo(cache_dir_path)
\ No newline at end of file
diff --git a/athena/athena/schemas/programming_exercise.py b/athena/athena/schemas/programming_exercise.py
index 0f9d40c44..2ac9610e5 100644
--- a/athena/athena/schemas/programming_exercise.py
+++ b/athena/athena/schemas/programming_exercise.py
@@ -1,6 +1,6 @@
 from pydantic import Field, AnyUrl
 from zipfile import ZipFile
-from git import Repo
+from git.repo import Repo
 
 from athena.helpers.programming.code_repository import get_repository_zip, get_repository
 from .exercise_type import ExerciseType
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 7c0cc685d..833f7ddd6 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -10,4 +10,6 @@
 
 Changed files:
 {changed_files}
+
+Grading instructions by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index a1ee99f9f..6175ec07f 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file.
+Restructure the problem statement by changed file.
 """
 
 human_template = """\
@@ -10,4 +10,6 @@
 
 Changed files:
 {changed_files}
+
+Problem statement by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index fefa93e14..0982e5c2a 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -5,6 +5,7 @@
 
 from athena.programming import Exercise
 from athena.storage import store_exercise
+from athena.logger import logger
 
 from module_programming_llm.config import BasicApproachConfig
 from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
@@ -44,6 +45,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
     file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
     changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
 
+    # logger.info("Exercise: %s", file_extension)
+    # logger.info("Changed files: %s", changed_files)
+    # logger.info("Solution repo: %s", solution_repo)
+    # logger.info("Template repo: %s", template_repo)
+    # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
         system_message=config.split_grading_instructions_by_file_prompt.system_message, 

From 46a3cc3a07b19674c1b8cc5237d7a598df06a627 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 21:30:15 +0200
Subject: [PATCH 21/51] change prompt

---
 module_programming_llm/module_programming_llm/config.py  | 8 ++++----
 .../prompts/split_grading_instructions_by_file.py        | 9 ++++++---
 .../prompts/split_problem_statement_by_file.py           | 7 +++++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 173ec1255..0d6cc7e95 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -18,10 +18,11 @@
 
 class SplitProblemStatementsByFilePrompt(BaseModel):
     """\
-Features available: **{problem_statement}**, **{changed_files}**\
+Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 
 *Note: `changed_files` are the changed files between template and solution repository.*\
 """
+    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_problem_statements_by_file_human_template,
@@ -30,10 +31,9 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
-Features available: **{grading_instructions}**, **{changed_files}**
-
-*Note: `changed_files` are the changed files between template and solution repository.*\
+Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 """
+    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_grading_instructions_by_file_human_template,
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 833f7ddd6..2f181609f 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,15 +1,18 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file.
+Restructure the grading instructions by changed file to make it simpler.
 """
 
 human_template = """\
 Grading instructions:
 {grading_instructions}
 
-Changed files:
-{changed_files}
+Changed files from template to sample solution:
+{changed_files_from_template_to_solution}
+
+Changed files from template to student submission:
+{changed_files_from_template_to_submission}
 
 Grading instructions by file:
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index 6175ec07f..c06aea30e 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -8,8 +8,11 @@
 Problem statement:
 {problem_statement}
 
-Changed files:
-{changed_files}
+Changed files from template to sample solution:
+{changed_files_from_template_to_solution}
+
+Changed files from template to student submission:
+{changed_files_from_template_to_submission}
 
 Problem statement by file:
 """
\ No newline at end of file

From 9be4514bd63c715be5fa2960001efffcef751aef Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 22:20:06 +0200
Subject: [PATCH 22/51] update split problem statements and grading
 instructions

---
 .../module_programming_llm/__main__.py        |  8 +-
 .../split_grading_instructions_by_file.py     | 99 ++++++++++---------
 .../split_problem_statement_by_file.py        | 97 +++++++++---------
 3 files changed, 103 insertions(+), 101 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py
index e4fb070fe..786d9d824 100644
--- a/module_programming_llm/module_programming_llm/__main__.py
+++ b/module_programming_llm/module_programming_llm/__main__.py
@@ -8,18 +8,12 @@
 from module_programming_llm.config import Configuration
 
 from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file
-from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
-from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
 
 
 @submissions_consumer
-def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration):
+def receive_submissions(exercise: Exercise, submissions: List[Submission]):
     logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)
 
-    # Split problem statements and grading instructions for later
-    generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
-    generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug)
-
 
 @submission_selector
 def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission:
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 0982e5c2a..8d45b597f 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -3,16 +3,16 @@
 
 from pydantic import BaseModel, Field
 
-from athena.programming import Exercise
-from athena.storage import store_exercise
-from athena.logger import logger
+from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
-
-
-FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions"
+from module_programming_llm.helpers.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    num_tokens_from_string, 
+    num_tokens_from_prompt, 
+    predict_and_parse
+)
+from module_programming_llm.helpers.utils import get_diff
 
 
 class FileGradingInstruction(BaseModel):
@@ -22,34 +22,50 @@ class FileGradingInstruction(BaseModel):
 
 class SplitGradingInstructions(BaseModel):
     """Collection of grading instructions split by file"""
-    instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+    file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
-def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
+async def split_grading_instructions_by_file(
+        exercise: Exercise, 
+        submission: Submission,
+        config: BasicApproachConfig, 
+        debug: bool
+    ) -> Optional[SplitGradingInstructions]:
     """Split the general grading instructions by file
 
     Args:
-        exercise (Exercise): Exercise to split the grading instructions for
+        exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files)
+        submission (Submission): Submission to split the grading instructions for (respecting the changed files)
         config (BasicApproachConfig): Configuration
 
     Returns:
-        SplitGradingInstructions: Grading instructions split by file, empty if input was too long
+        Optional[SplitGradingInstructions]: Split grading instructions, None if it is too short or too long
     """
-    if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "":
-        return SplitGradingInstructions(instructions=[])
+
+    # Return None if the grading instructions are too short
+    if (exercise.grading_instructions is None 
+            or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split):
+        return None
     
     model = config.model.get_model()
     
-    solution_repo = exercise.get_solution_repository()
     template_repo = exercise.get_template_repository()
-    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
-
-    # logger.info("Exercise: %s", file_extension)
-    # logger.info("Changed files: %s", changed_files)
-    # logger.info("Solution repo: %s", solution_repo)
-    # logger.info("Template repo: %s", template_repo)
-    # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
+    solution_repo = exercise.get_solution_repository()
+    submission_repo = submission.get_repository()
+
+    changed_files_from_template_to_solution = get_diff(
+        src_repo=template_repo, 
+        dst_repo=solution_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
 
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
@@ -60,15 +76,13 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
 
     prompt_input = {
         "grading_instructions": exercise.grading_instructions, 
-        "changed_files": changed_files
+        "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
+        "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
     }
 
-    # If the input is too long, return an empty SplitGradingInstructions object
-    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
-    if prompt_length > config.max_input_tokens:
-        if debug:
-            emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
-        return SplitGradingInstructions(instructions=[])
+    # Return None if the prompt is too long
+    if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
+        return None
 
     split_grading_instructions = predict_and_parse(
         model=model, 
@@ -78,25 +92,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach
     )
 
     if debug:
-        emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict())
-
-    return split_grading_instructions
+        emit_meta("file_problem_statement", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": split_grading_instructions.dict()
+        })
 
+    if not split_grading_instructions.file_grading_instructions:
+        return None
 
-def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions:
-    """Generate and store the split grading instructions if needed
-
-    Args:
-        exercise (Exercise): Exercise to get the split grading instructions for
-        config (BasicApproachConfig): Configuration
-
-    Returns:
-        SplitGradingInstructions: Grading instructions split by file
-    """
-    if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta:
-        return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY])
-
-    split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug)
-    exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict()
-    store_exercise(exercise)
     return split_grading_instructions
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 8329e5749..1000ad245 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -1,17 +1,18 @@
-from typing import Sequence
+from typing import Optional, Sequence
 from athena import emit_meta
 
 from pydantic import BaseModel, Field
 
-from athena.programming import Exercise
-from athena.storage import store_exercise
+from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension
-
-
-FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements"
+from module_programming_llm.helpers.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    num_tokens_from_string, 
+    num_tokens_from_prompt, 
+    predict_and_parse
+)
+from module_programming_llm.helpers.utils import get_diff
 
 
 class FileProblemStatement(BaseModel):
@@ -21,28 +22,49 @@ class FileProblemStatement(BaseModel):
 
 class SplitProblemStatement(BaseModel):
     """Collection of problem statements split by file"""
-    problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+    file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
-def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
+async def split_problem_statement_by_file(
+        exercise: Exercise, 
+        submission: Submission, 
+        config: BasicApproachConfig, 
+        debug: bool
+    ) -> Optional[SplitProblemStatement]:
     """Split the general problem statement by file
 
     Args:
-        exercise (Exercise): Exercise to split the problem statement for
+        exercise (Exercise): Exercise to split the problem statement for (respecting the changed files)
+        submission (Submission): Submission to split the problem statement for (respecting the changed files)
         config (BasicApproachConfig): Configuration
 
     Returns:
-        SplitProblemStatement: Problem statement split by file, empty if input was too long
+        Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long
     """
-    if exercise.problem_statement.strip() == "":
-        return SplitProblemStatement(problem_statements=[])
     
+    # Return None if the problem statement is too short
+    if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split:
+        return None
+
     model = config.model.get_model()
-    
-    solution_repo = exercise.get_solution_repository()
+
     template_repo = exercise.get_template_repository()
-    file_extension = get_programming_language_file_extension(exercise.programming_language) or ""
-    changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True)
+    solution_repo = exercise.get_solution_repository()
+    submission_repo = submission.get_repository()
+
+    changed_files_from_template_to_solution = get_diff(
+        src_repo=template_repo, 
+        dst_repo=solution_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
 
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
@@ -50,18 +72,16 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon
         human_message=config.split_problem_statement_by_file_prompt.system_message,
         pydantic_object=SplitProblemStatement
     )
-
+    
     prompt_input = {
         "problem_statement": exercise.problem_statement,
-        "changed_files": changed_files
+        "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
+        "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
     }
 
-    # If the input is too long, return an empty SplitProblemStatement object
-    prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input)
-    if prompt_length > config.max_input_tokens:
-        if debug:
-            emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}")
-        return SplitProblemStatement(problem_statements=[])
+    # Return None if the prompt is too long
+    if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
+        return None
 
     split_problem_statement = predict_and_parse(
         model=model, 
@@ -71,25 +91,12 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon
     )
 
     if debug:
-        emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict())
+        emit_meta("file_problem_statement", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": split_problem_statement.dict()
+        })
 
-    return split_problem_statement
-
-
-def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement:
-    """Generate and store the split problem statement if needed
-
-    Args:
-        exercise (Exercise): Exercise to split the problem statement for
-        config (BasicApproachConfig): Configuration
-
-    Returns:
-        SplitProblemStatement: Problem statement split by file
-    """
-    if FILE_PROBLEM_STATEMETS_KEY in exercise.meta:
-        return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY])
+    if not split_problem_statement.file_problem_statements:
+        return None
 
-    split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug)
-    exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict()
-    store_exercise(exercise)
     return split_problem_statement

From 9b00f2dca0a2db66bae462ce2456b0d6da274339 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 23:46:04 +0200
Subject: [PATCH 23/51] refactor generator

---
 .../module_programming_llm/config.py          |   5 +-
 .../generate_suggestions_by_file.py           | 246 +++++++++++-------
 .../helpers/llm_utils.py                      |  21 +-
 .../module_programming_llm/helpers/utils.py   |   2 +-
 .../prompts/generate_suggestions_by_file.py   |   4 +-
 .../split_grading_instructions_by_file.py     |   2 +-
 .../split_problem_statement_by_file.py        |   2 +-
 7 files changed, 157 insertions(+), 125 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 0d6cc7e95..3507eded0 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -43,9 +43,9 @@ class SplitGradingInstructionsByFilePrompt(BaseModel):
 class GenerationPrompt(BaseModel):
     """\
 Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \
-**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**
+**{submission_file}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}**
 
-*Note: Prompt will be applied per file independently, submission is a single file.*\
+*Note: Prompt will be applied per file independently. Also, you don't have to include all features, e.g. template_to_solution_diff.*\
 """
     system_message: str = Field(default=generate_suggestions_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -58,6 +58,7 @@ class BasicApproachConfig(BaseModel):
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig())  # type: ignore
     
+    max_number_of_files: int = Field(default=25, description="Maximum number of files.")
     split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt())
     split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt())
     generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt())
diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index bc93269eb..ad8daf47e 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -1,17 +1,25 @@
 from typing import List, Optional, Sequence
-
+import asyncio
 from pydantic import BaseModel, Field
-from langchain.chains.openai_functions import create_structured_output_chain
 
 from athena import emit_meta
 from athena.programming import Exercise, Submission, Feedback
-from athena.logger import logger
 
 from module_programming_llm.config import BasicApproachConfig
-from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed
-from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed
-from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions
-from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
+from module_programming_llm.split_grading_instructions_by_file import split_grading_instructions_by_file
+from module_programming_llm.split_problem_statement_by_file import split_problem_statement_by_file
+from module_programming_llm.helpers.llm_utils import (
+    check_prompt_length_and_omit_features_if_necessary, 
+    get_chat_prompt_with_formatting_instructions,
+    num_tokens_from_string,
+    predict_and_parse,
+)
+from module_programming_llm.helpers.utils import(
+    get_diff,
+    load_files_from_repo, 
+    add_line_numbers, 
+    get_programming_language_file_extension
+)
 
 
 class FeedbackModel(BaseModel):
@@ -38,13 +46,26 @@ class Config:
 async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
-    # Get split grading instructions
-    split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug)
-    file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions }
+    # Get split problem statement and grading instructions by file (if necessary)
+    split_problem_statement, split_grading_instructions = await asyncio.gather(
+        split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug),
+        split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug)
+    )
+
+    is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
+    file_problem_statements = { 
+        item.file_name: item.problem_statement 
+        for item in split_problem_statement.file_problem_statements 
+    } if split_problem_statement is not None else {}
 
-    # Get split problem statement
-    split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug)
-    file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements }
+    is_short_grading_instructions = (
+        num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split 
+        if exercise.grading_instructions is not None else True
+    )
+    file_grading_instructions = { 
+        item.file_name: item.grading_instructions 
+        for item in split_grading_instructions.file_grading_instructions 
+    } if split_grading_instructions is not None else {}
 
     prompt_inputs: List[dict] = []
     
@@ -53,33 +74,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     template_repo = exercise.get_template_repository()
     submission_repo = submission.get_repository()
     
-    file_extension = get_programming_language_file_extension(exercise.programming_language)
-    if file_extension is None:
-        raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
-
-    files = load_files_from_repo(
+    changed_files_from_template_to_submission = get_diff(
+        src_repo=template_repo, 
+        dst_repo=submission_repo, 
+        file_path=None, 
+        name_only=True
+    ).split("\n")
+
+    # Changed text files
+    changed_files = load_files_from_repo(
         submission_repo, 
-        file_filter=lambda x: x.endswith(file_extension) if file_extension else False
+        file_filter=lambda x: x in changed_files_from_template_to_submission
     )
 
-    for file_path, content in files.items():
-        if content is None:
-            continue
-        
-        problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.")
-        grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.")
-
-        content = add_line_numbers(content)
-        solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
-        template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
+    for file_path, file_content in changed_files.items():
+        problem_statement = (
+            exercise.problem_statement if is_short_problem_statement 
+            else file_problem_statements.get(file_path, "No relevant problem statement section found.")
+        )
+        problem_statement = problem_statement if problem_statement.strip() else "No problem statement found."
+
+        grading_instructions = (
+            exercise.grading_instructions or "" if is_short_grading_instructions
+            else file_grading_instructions.get(file_path, "No relevant grading instructions found.")
+        )
+        grading_instructions = grading_instructions if grading_instructions.strip() else "No grading instructions found."
+
+        file_content = add_line_numbers(file_content)
+        solution_to_submission_diff = get_diff(
+            src_repo=solution_repo, 
+            dst_repo=submission_repo, 
+            src_prefix="solution", 
+            dst_prefix="submission", 
+            file_path=file_path
+        )
+        template_to_submission_diff = get_diff(
+            src_repo=template_repo, 
+            dst_repo=submission_repo, 
+            src_prefix="template", 
+            dst_prefix="submission", 
+            file_path=file_path
+        )
+        template_to_solution_diff = get_diff(
+            src_repo=template_repo, 
+            dst_repo=solution_repo, 
+            src_prefix="template", 
+            dst_prefix="solution", 
+            file_path=file_path
+        )
 
         prompt_inputs.append({
             "file_path": file_path,
-            "submission": content,
+            "priority": len(template_to_solution_diff),
+            "submission_file": file_content,
             "max_points": exercise.max_points,
             "bonus_points": exercise.bonus_points,
             "solution_to_submission_diff": solution_to_submission_diff,
             "template_to_submission_diff": template_to_submission_diff,
+            "template_to_solution_diff": template_to_solution_diff,
             "grading_instructions": grading_instructions,
             "problem_statement": problem_statement,
         })
@@ -93,11 +145,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
 
     # Filter long prompts (omitting features if necessary)
     omittable_features = [
+        "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
         "problem_statement", 
         "grading_instructions",
+        "solution_to_submission_diff",
         "template_to_submission_diff",
-        "solution_to_submission_diff"
     ]
+    # "submission_file" is not omittable, because it is the main input containing the line numbers
+    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
+
     prompt_inputs = [
         omitted_prompt_input for omitted_prompt_input, should_run in
         [check_prompt_length_and_omit_features_if_necessary(
@@ -110,70 +166,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         if should_run
     ]
 
-    chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt)
-    if not prompt_inputs:
-        return []
-    result = await chain.agenerate(prompt_inputs)
-
-    logger.info("Generated result: %s ", result)
-
-    return []
-    # return predict_and_parse(
-    #     model=model, 
-    #     chat_prompt=chat_prompt, 
-    #     prompt_input={
-    #         "grading_instructions": exercise.grading_instructions, 
-    #         "changed_files": changed_files
-    #     }, 
-    #     pydantic_object=SplitGradingInstructions
-    # )
-
-
-
-
-
-# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
-    
-#     # Filter long prompts
-#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
-
-#     # Completion
-#     chain = LLMChain(llm=chat, prompt=chat_prompt)
-#     if not input_list:
-#         return []
-#     result = await chain.agenerate(input_list)
-    
-#     # Parse result
-#     feedback_proposals: List[Feedback] = []
-#     for input, generations in zip(input_list, result.generations):
-#         file_path = input["file_path"]
-#         for generation in generations:
-#             try:
-#                 feedbacks = json.loads(generation.text)
-#             except json.JSONDecodeError:
-#                 logger.error("Failed to parse feedback json: %s", generation.text)
-#                 continue
-#             if not isinstance(feedbacks, list):
-#                 logger.error("Feedback json is not a list: %s", generation.text)
-#                 continue
-
-#             for feedback in feedbacks:
-#                 line = feedback.get("line", None)
-#                 description = feedback.get("text", None)
-#                 credits = feedback.get("credits", 0.0)
-#                 feedback_proposals.append(
-#                     Feedback(
-#                         id=None,
-#                         exercise_id=exercise.id,
-#                         submission_id=submission.id,
-#                         title="Feedback",
-#                         description=description,
-#                         file_path=file_path,
-#                         line_start=line,
-#                         line_end=None,
-#                         credits=credits,
-#                         meta={},
-#                     )
-#                 )
-
-#     return feedback_proposals
\ No newline at end of file
+    # If we have many files we need to filter and prioritize them
+    if len(prompt_inputs) > config.max_number_of_files:
+        programming_language_extension = get_programming_language_file_extension(programming_language=exercise.programming_language)
+
+        # Prioritize files that have a diff between solution and submission
+        prompt_inputs = sorted(
+            prompt_inputs, 
+            key=lambda x: x["priority"], 
+            reverse=True
+        )
+
+        filtered_prompt_inputs = []
+        if programming_language_extension is not None:
+            filtered_prompt_inputs = [
+                prompt_input 
+                for prompt_input in prompt_inputs 
+                if prompt_input["file_path"].endswith(programming_language_extension)
+            ]
+
+        while len(filtered_prompt_inputs) < config.max_number_of_files and prompt_inputs:
+            filtered_prompt_inputs.append(prompt_inputs.pop(0))
+        prompt_inputs = filtered_prompt_inputs
+   
+    results: List[AssessmentModel] = await asyncio.gather(*[
+        predict_and_parse(
+            model=model, 
+            chat_prompt=chat_prompt, 
+            prompt_input=prompt_input, 
+            pydantic_object=AssessmentModel
+        ) for prompt_input in prompt_inputs
+    ])
+
+    if debug:
+        emit_meta(
+            "generate_suggestions", [
+                {
+                    "file_path": prompt_input["file_path"],
+                    "prompt": chat_prompt.format(**prompt_input),
+                    "result": result.dict()
+                }
+                for prompt_input, result in zip(prompt_inputs, results)
+            ]
+        )
+
+    feedbacks: List[Feedback] = []
+    for prompt_input, result in zip(prompt_inputs, results):
+        file_path = prompt_input["file_path"]
+        for feedback in result.feedbacks:
+            feedbacks.append(Feedback(
+                exercise_id=exercise.id,
+                submission_id=submission.id,
+                title=feedback.title,
+                description=feedback.description,
+                file_path=file_path,
+                line_start=feedback.line_start,
+                line_end=feedback.line_end,
+                credits=feedback.credits,
+                meta={}
+            ))
+
+    return feedbacks
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index 53a300f00..b59ca9dab 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions(
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 
-def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
     """Predicts and parses the output of the model
 
     Args:
@@ -131,22 +131,3 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate,
     chain = LLMChain(llm=model, prompt=chat_prompt)
     output = chain.run(**prompt_input)
     return output_parser.parse(output)
-
-
-async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
-    """Generates and parses the output of the model
-
-    Args:
-        model (BaseLanguageModel): The model to generate with
-        chat_prompt (ChatPromptTemplate): Prompt to use
-        prompt_input (dict): Input parameters to use for the prompt
-        pydantic_object (Type[T]): Pydantic model to parse the output
-    """
-    if supports_function_calling(model):
-        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
-        return chain.run(**prompt_input)
-    
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
-    chain = LLMChain(llm=model, prompt=chat_prompt)
-    output = chain.run(**prompt_input)
-    return output_parser.parse(output)
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py
index 8f6c67ab8..894f4a6a6 100644
--- a/module_programming_llm/module_programming_llm/helpers/utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/utils.py
@@ -9,7 +9,7 @@
 from langchain.document_loaders import GitLoader
 
 
-def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, Optional[str]]:
+def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]:
     return {
         doc.metadata['file_path']: doc.page_content
         for doc in GitLoader(repo_path=str(repo.working_tree_dir), file_filter=file_filter).load()
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index f1a300ae0..2e6b27059 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -16,8 +16,8 @@
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}
 
-Student\'s submission to grade (with line numbers <number>: <line>):
-{submission}
+Student\'s submission file to grade (with line numbers <number>: <line>):
+{submission_file}
 
 Diff between solution (deletions) and student\'s submission (additions):
 {solution_to_submission_diff}
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 8d45b597f..cb1f4a29f 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -84,7 +84,7 @@ async def split_grading_instructions_by_file(
     if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
         return None
 
-    split_grading_instructions = predict_and_parse(
+    split_grading_instructions = await predict_and_parse(
         model=model, 
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input, 
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 1000ad245..28df233bf 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -83,7 +83,7 @@ async def split_problem_statement_by_file(
     if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens:
         return None
 
-    split_problem_statement = predict_and_parse(
+    split_problem_statement = await predict_and_parse(
         model=model, 
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input,

From 33cc686518d018dd768bcdfb89444451bf857354 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 23:46:16 +0200
Subject: [PATCH 24/51] fix spacing

---
 .../module_programming_llm/generate_suggestions_by_file.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index ad8daf47e..22240d708 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -14,7 +14,7 @@
     num_tokens_from_string,
     predict_and_parse,
 )
-from module_programming_llm.helpers.utils import(
+from module_programming_llm.helpers.utils import (
     get_diff,
     load_files_from_repo, 
     add_line_numbers, 

From 01e736425e535dfc2793dd41f60de04e841d0a73 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:02:22 +0200
Subject: [PATCH 25/51] reorder imports

---
 .../module_text_llm/suggest_feedback_basic.py          | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/module_text_llm/module_text_llm/suggest_feedback_basic.py b/module_text_llm/module_text_llm/suggest_feedback_basic.py
index 96d0d258d..95573fc96 100644
--- a/module_text_llm/module_text_llm/suggest_feedback_basic.py
+++ b/module_text_llm/module_text_llm/suggest_feedback_basic.py
@@ -1,4 +1,5 @@
 from typing import List, Optional, Sequence
+from pydantic import BaseModel, Field
 
 from langchain.chat_models import ChatOpenAI
 from langchain.chains import LLMChain
@@ -9,20 +10,15 @@
 )
 from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
 from langchain.schema.output_parser import OutputParserException
+from langchain.chains.openai_functions import create_structured_output_chain
 
 from athena import emit_meta
 from athena.text import Exercise, Submission, Feedback
 from athena.logger import logger
-from pydantic import BaseModel, Field
-from module_text_llm.config import BasicApproachConfig
 
+from module_text_llm.config import BasicApproachConfig
 from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, num_tokens_from_string
 
-
-from langchain.chains.openai_functions import (
-    create_structured_output_chain,
-)
-
 class FeedbackModel(BaseModel):
     title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error")
     description: str = Field(..., description="Feedback description")

From d129645a8cb1efec48cf31d58d2edde8cf21a3f1 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:03:33 +0200
Subject: [PATCH 26/51] update name

---
 module_text_llm/module_text_llm/__main__.py                 | 6 +++---
 .../{suggest_feedback_basic.py => generate_suggestions.py}  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename module_text_llm/module_text_llm/{suggest_feedback_basic.py => generate_suggestions.py} (97%)

diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py
index 457ff59e7..e9bf8d448 100644
--- a/module_text_llm/module_text_llm/__main__.py
+++ b/module_text_llm/module_text_llm/__main__.py
@@ -8,7 +8,7 @@
 from athena.logger import logger
 
 from module_text_llm.config import Configuration
-from .suggest_feedback_basic import suggest_feedback_basic
+from module_text_llm.generate_suggestions import generate_suggestions
 
 
 @submissions_consumer
@@ -24,13 +24,13 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
 
 @feedback_consumer
 def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
-    logger.info("process_feedback: Received feedbacks for submission %d of exercise %d.", submission.id, exercise.id)
+    logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)
 
 
 @feedback_provider
 async def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]:
     logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id)
-    return await suggest_feedback_basic(exercise, submission, module_config.approach, module_config.debug)
+    return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug)
 
 
 if __name__ == "__main__":
diff --git a/module_text_llm/module_text_llm/suggest_feedback_basic.py b/module_text_llm/module_text_llm/generate_suggestions.py
similarity index 97%
rename from module_text_llm/module_text_llm/suggest_feedback_basic.py
rename to module_text_llm/module_text_llm/generate_suggestions.py
index 95573fc96..519de9e01 100644
--- a/module_text_llm/module_text_llm/suggest_feedback_basic.py
+++ b/module_text_llm/module_text_llm/generate_suggestions.py
@@ -68,7 +68,7 @@ def check_token_length_and_omit_from_input_if_necessary(prompt: ChatPromptTempla
 
 
 # pylint: disable-msg=too-many-locals
-async def suggest_feedback_basic(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
+async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
     prompt_input = {

From 7125521779487e484d4fcc2be7bbb4d26a6abaf0 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:06:35 +0200
Subject: [PATCH 27/51] rename and improve docs

---
 module_text_llm/module_text_llm/config.py             | 11 ++++++-----
 ...gest_feedback_basic.py => generate_suggestions.py} |  0
 2 files changed, 6 insertions(+), 5 deletions(-)
 rename module_text_llm/module_text_llm/prompts/{suggest_feedback_basic.py => generate_suggestions.py} (100%)

diff --git a/module_text_llm/module_text_llm/config.py b/module_text_llm/module_text_llm/config.py
index 8cfbb9dc8..8598ee8de 100644
--- a/module_text_llm/module_text_llm/config.py
+++ b/module_text_llm/module_text_llm/config.py
@@ -2,13 +2,14 @@
 
 from athena import config_schema_provider
 from module_text_llm.helpers.models import ModelConfigType, DefaultModelConfig
-from .prompts.suggest_feedback_basic import system_template, human_template
+from module_text_llm.prompts.generate_suggestions import system_template, human_template
 
 
-class BasicPrompt(BaseModel):
+class GenerateSuggestionsPrompt(BaseModel):
     """\
-Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{submission}**, **{max_points}**, **{bonus_points}**
-**{problem_statement}** or **{example_solution}** might be omitted if the input is too long.\
+Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
+
+_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\
 """
     system_message: str = Field(default=system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -20,7 +21,7 @@ class BasicApproachConfig(BaseModel):
     """This approach uses a LLM with a single prompt to generate feedback in a single step."""
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore
-    prompt: BasicPrompt = Field(default=BasicPrompt())
+    prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
 
 
 @config_schema_provider
diff --git a/module_text_llm/module_text_llm/prompts/suggest_feedback_basic.py b/module_text_llm/module_text_llm/prompts/generate_suggestions.py
similarity index 100%
rename from module_text_llm/module_text_llm/prompts/suggest_feedback_basic.py
rename to module_text_llm/module_text_llm/prompts/generate_suggestions.py

From 4d784fa2c825444c75f00e8ac2d3310b7d35db6d Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:07:17 +0200
Subject: [PATCH 28/51] change naming

---
 module_text_llm/module_text_llm/config.py               | 2 +-
 module_text_llm/module_text_llm/generate_suggestions.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/module_text_llm/module_text_llm/config.py b/module_text_llm/module_text_llm/config.py
index 8598ee8de..17519aea5 100644
--- a/module_text_llm/module_text_llm/config.py
+++ b/module_text_llm/module_text_llm/config.py
@@ -21,7 +21,7 @@ class BasicApproachConfig(BaseModel):
     """This approach uses a LLM with a single prompt to generate feedback in a single step."""
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore
-    prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
+    generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
 
 
 @config_schema_provider
diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py
index 519de9e01..0fc34277d 100644
--- a/module_text_llm/module_text_llm/generate_suggestions.py
+++ b/module_text_llm/module_text_llm/generate_suggestions.py
@@ -87,13 +87,13 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
     
     # Prepare prompt
     if supports_function_calling:
-        system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message)
-        human_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.human_message)
+        system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message)
+        human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message)
     else:
-        system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message + "\n{format_instructions}")
+        system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message + "\n{format_instructions}")
         system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
         system_message_prompt.prompt.input_variables.remove("format_instructions")
-        human_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.human_message + "\nJSON Response:")
+        human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message + "\nJSON Response:")
     chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
     prompt_input, should_run = check_token_length_and_omit_from_input_if_necessary(chat_prompt, prompt_input, config.max_input_tokens, debug)

From 41602c77b7b880540d76b5c7e63fd4eb17b21fb9 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sat, 19 Aug 2023 14:28:23 +0200
Subject: [PATCH 29/51] refactor

---
 .../module_text_llm/generate_suggestions.py   | 107 ++++----------
 .../module_text_llm/helpers/llm_utils.py      | 134 ++++++++++++++++++
 2 files changed, 165 insertions(+), 76 deletions(-)
 create mode 100644 module_text_llm/module_text_llm/helpers/llm_utils.py

diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py
index 0fc34277d..b8110be1e 100644
--- a/module_text_llm/module_text_llm/generate_suggestions.py
+++ b/module_text_llm/module_text_llm/generate_suggestions.py
@@ -1,22 +1,17 @@
 from typing import List, Optional, Sequence
 from pydantic import BaseModel, Field
 
-from langchain.chat_models import ChatOpenAI
-from langchain.chains import LLMChain
-from langchain.prompts import (
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
-from langchain.schema.output_parser import OutputParserException
-from langchain.chains.openai_functions import create_structured_output_chain
-
 from athena import emit_meta
 from athena.text import Exercise, Submission, Feedback
 from athena.logger import logger
 
 from module_text_llm.config import BasicApproachConfig
+from module_text_llm.helpers.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    check_prompt_length_and_omit_features_if_necessary, 
+    num_tokens_from_prompt,
+    predict_and_parse
+)
 from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, num_tokens_from_string
 
 class FeedbackModel(BaseModel):
@@ -39,35 +34,6 @@ class Config:
         title = "Assessment"
 
 
-def check_token_length_and_omit_from_input_if_necessary(prompt: ChatPromptTemplate, prompt_input, max_input_tokens: int, debug: bool):
-    if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens:
-        return prompt_input, True
-
-    omitted_features = []        
-
-    # Input is too long -> Try to omit example_solution
-    if "example_solution" in prompt_input:
-        prompt_input["example_solution"] = "omitted"
-        omitted_features.append("example_solution")
-        if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens:
-            if debug:
-                emit_meta("omitted_features", omitted_features)
-            return prompt_input, True
-        
-    # Input is still too long -> Try to omit problem_statement
-    if "problem_statement" in prompt_input:
-        prompt_input["problem_statement"] = "omitted"
-        omitted_features.append("problem_statement")
-        if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens:
-            if debug:
-                emit_meta("omitted_features", omitted_features)
-            return prompt_input, True
-
-    # Input is still too long -> Model should not run 
-    return prompt_input, False
-
-
-# pylint: disable-msg=too-many-locals
 async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
@@ -80,46 +46,37 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
         "submission": add_sentence_numbers(submission.text)
     }
 
-    supports_function_calling = isinstance(model, ChatOpenAI)
-
-    # Output parser for non-function-calling models
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=AssessmentModel), llm=model)
-    
-    # Prepare prompt
-    if supports_function_calling:
-        system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message)
-        human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message)
-    else:
-        system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message + "\n{format_instructions}")
-        system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
-        system_message_prompt.prompt.input_variables.remove("format_instructions")
-        human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message + "\nJSON Response:")
-    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-    prompt_input, should_run = check_token_length_and_omit_from_input_if_necessary(chat_prompt, prompt_input, config.max_input_tokens, debug)
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_prompt.system_message, 
+        human_message=config.generate_suggestions_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+    # Check if the prompt is too long and omit features if necessary (in order of importance)
+    omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
+    prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
+        prompt=chat_prompt,
+        prompt_input= prompt_input,
+        max_input_tokens=config.max_input_tokens,
+        omittable_features=omittable_features,
+        debug=debug
+    )
+
+    # Skip if the prompt is too long
     if not should_run:
         logger.warning("Input too long. Skipping.")
         if debug:
             emit_meta("prompt", chat_prompt.format(**prompt_input))
-            emit_meta("error", "Input too long. Skipping.")
-        
-        # Return early since we cannot run the model
+            emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
         return []
 
-    if supports_function_calling:        
-        chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt)
-        result = chain.run(**prompt_input)
-    else:
-        chain = LLMChain(llm=model, prompt=chat_prompt)
-        output = chain.run(**prompt_input)
-
-        try:
-            result = output_parser.parse(output)
-        except OutputParserException as e:
-            logger.warning("Could not parse and fix output: %s", e)
-            result = AssessmentModel(feedbacks=[])
-            if debug:
-                emit_meta("parsing_error", output)
+    result = predict_and_parse(
+        model=model, 
+        chat_prompt=chat_prompt, 
+        prompt_input=prompt_input, 
+        pydantic_object=AssessmentModel
+    )
 
     if debug:
         emit_meta("prompt", chat_prompt.format(**prompt_input))
@@ -128,8 +85,6 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
     for feedback in result.feedbacks:
         index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
         feedbacks.append(Feedback(
-            id=None,
-            grading_instruction_id=None,
             exercise_id=exercise.id,
             submission_id=submission.id,
             title=feedback.title,
diff --git a/module_text_llm/module_text_llm/helpers/llm_utils.py b/module_text_llm/module_text_llm/helpers/llm_utils.py
new file mode 100644
index 000000000..1cfd646e6
--- /dev/null
+++ b/module_text_llm/module_text_llm/helpers/llm_utils.py
@@ -0,0 +1,134 @@
+from typing import Type, TypeVar, List
+from pydantic import BaseModel
+
+import tiktoken
+
+from langchain.chains import LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.base_language import BaseLanguageModel
+from langchain.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
+from langchain.chains.openai_functions import create_structured_output_chain
+
+from athena import emit_meta
+
+
+T = TypeVar("T", bound=BaseModel)
+
+
+def num_tokens_from_string(string: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+
+
+def num_tokens_from_prompt(chat_prompt: ChatPromptTemplate, prompt_input: dict) -> int:
+    """Returns the number of tokens in a chat prompt."""
+    return num_tokens_from_string(chat_prompt.format(**prompt_input))
+
+
+def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate, 
+                                                       prompt_input: dict, 
+                                                       max_input_tokens: int, 
+                                                       omittable_features: List[str],
+                                                       debug: bool):
+    """Check if the input is too long and omit features if necessary.
+
+    Note: Omitted features will be replaced with "omitted" in the prompt
+
+    Args:
+        prompt (ChatPromptTemplate): Prompt template
+        prompt_input (dict): Prompt input
+        max_input_tokens (int): Maximum number of tokens allowed
+        omittable_features (List[str]): List of features that can be omitted, ordered by priority (least important first)
+        debug (bool): Debug flag
+
+    Returns:
+        (dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and 
+                      should_run is True if the model should run, False otherwise
+    """
+    if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens:
+        return prompt_input, True
+
+    omitted_features = []
+
+    # Omit features until the input is short enough
+    for feature in omittable_features:
+        if feature in prompt_input:
+            omitted_features.append(feature)
+            prompt_input[feature] = "omitted"
+            if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens:
+                if debug:
+                    emit_meta("omitted_features", omitted_features)
+                return prompt_input, True
+
+    # If we get here, we couldn't omit enough features
+    return prompt_input, False
+
+
+def supports_function_calling(model: BaseLanguageModel):
+    """Returns True if the model supports function calling, False otherwise
+
+    Args:
+        model (BaseLanguageModel): The model to check
+
+    Returns:
+        boolean: True if the model supports function calling, False otherwise
+    """
+    return isinstance(model, ChatOpenAI)
+
+
+def get_chat_prompt_with_formatting_instructions(
+            model: BaseLanguageModel,
+            system_message: str, 
+            human_message: str,
+            pydantic_object: Type[T]
+        ) -> ChatPromptTemplate:
+    """Returns a ChatPromptTemplate with formatting instructions (if necessary)
+
+    Note: Does nothing if the model supports function calling
+
+    Args:
+        model (BaseLanguageModel): The model to check if it supports function calling
+        system_message (str): System message
+        human_message (str): Human message
+        pydantic_object (Type[T]): Model to parse the output
+
+    Returns:
+        ChatPromptTemplate: ChatPromptTemplate with formatting instructions (if necessary)
+    """
+    if supports_function_calling(model):
+        system_message_prompt = SystemMessagePromptTemplate.from_template(system_message)
+        human_message_prompt = HumanMessagePromptTemplate.from_template(human_message)
+        return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+    
+    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
+    system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
+    system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
+    system_message_prompt.prompt.input_variables.remove("format_instructions")
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:")
+    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+
+def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+    """Predicts and parses the output of the model
+
+    Args:
+        model (BaseLanguageModel): The model to predict with
+        chat_prompt (ChatPromptTemplate): Prompt to use
+        prompt_input (dict): Input parameters to use for the prompt
+        pydantic_object (Type[T]): Pydantic model to parse the output
+    """
+    if supports_function_calling(model):
+        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
+        return chain.run(**prompt_input)
+    
+    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
+    chain = LLMChain(llm=model, prompt=chat_prompt)
+    output = chain.run(**prompt_input)
+    return output_parser.parse(output)

From 51f11a1d76d8e9c123450e48b03cd7aa648c1352 Mon Sep 17 00:00:00 2001
From: Paul Schwind <pal03377@users.noreply.github.com>
Date: Sat, 19 Aug 2023 17:11:52 +0200
Subject: [PATCH 30/51] Fix: Allow ID autoincrement in SQLite (#82)

---
 .../models/big_integer_with_autoincrement.py     | 16 ++++++++++++++++
 athena/athena/models/db_exercise.py              |  5 +++--
 athena/athena/models/db_feedback.py              |  3 ++-
 athena/athena/models/db_programming_feedback.py  |  7 ++++---
 .../athena/models/db_programming_submission.py   |  5 +++--
 athena/athena/models/db_submission.py            |  6 ++++--
 athena/athena/models/db_text_feedback.py         |  7 ++++---
 athena/athena/models/db_text_submission.py       |  5 +++--
 8 files changed, 39 insertions(+), 15 deletions(-)
 create mode 100644 athena/athena/models/big_integer_with_autoincrement.py

diff --git a/athena/athena/models/big_integer_with_autoincrement.py b/athena/athena/models/big_integer_with_autoincrement.py
new file mode 100644
index 000000000..e9b9ce168
--- /dev/null
+++ b/athena/athena/models/big_integer_with_autoincrement.py
@@ -0,0 +1,16 @@
+"""
+SQLAlchemy + SQLite does not support the autoincrement feature for BigInteger columns.
+This file provides a class as a workaround for this problem:
+It uses a normal Integer column in SQLite and a BigInteger column otherwise.
+SQLite Integer columns can autoincrement, but they are limited to 2^63-1.
+See https://stackoverflow.com/a/23175518/4306257 for more information.
+"""
+
+from sqlalchemy import BigInteger
+from sqlalchemy.dialects import postgresql, mysql, sqlite
+
+# Solution from https://stackoverflow.com/a/23175518/4306257
+BigIntegerWithAutoincrement = BigInteger()
+BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(postgresql.BIGINT(), 'postgresql')
+BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(mysql.BIGINT(), 'mysql')
+BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(sqlite.INTEGER(), 'sqlite')
\ No newline at end of file
diff --git a/athena/athena/models/db_exercise.py b/athena/athena/models/db_exercise.py
index 888daaf6d..9a2ea929a 100644
--- a/athena/athena/models/db_exercise.py
+++ b/athena/athena/models/db_exercise.py
@@ -1,11 +1,12 @@
-from sqlalchemy import Column, BigInteger, String, Float, JSON, Enum as SqlEnum
+from sqlalchemy import Column, String, Float, JSON, Enum as SqlEnum
 
 from athena.schemas import ExerciseType
 from .model import Model
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBExercise(Model):
-    id = Column(BigInteger, primary_key=True, index=True, nullable=False)
+    id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, nullable=False)
     title = Column(String, index=True, nullable=False)
     type = Column(SqlEnum(ExerciseType), index=True, nullable=False)
     max_points = Column(Float, index=True, nullable=False)
diff --git a/athena/athena/models/db_feedback.py b/athena/athena/models/db_feedback.py
index 94bb9a33d..95a07836f 100644
--- a/athena/athena/models/db_feedback.py
+++ b/athena/athena/models/db_feedback.py
@@ -1,12 +1,13 @@
 from sqlalchemy import Column, BigInteger, Boolean, String, Float, JSON, UniqueConstraint
 
 from .model import Model
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBFeedback(Model):
     __table_args__ = (UniqueConstraint('lms_id'),)
 
-    id = Column(BigInteger, primary_key=True, index=True, autoincrement=True)
+    id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True)
     lms_id = Column(BigInteger)
     title = Column(String)
     description = Column(String)
diff --git a/athena/athena/models/db_programming_feedback.py b/athena/athena/models/db_programming_feedback.py
index b096f9ece..94e855f2d 100644
--- a/athena/athena/models/db_programming_feedback.py
+++ b/athena/athena/models/db_programming_feedback.py
@@ -1,11 +1,12 @@
 from typing import cast, Optional
 from athena.schemas.programming_submission import ProgrammingSubmission
-from sqlalchemy import Column, Integer, BigInteger, String, ForeignKey
+from sqlalchemy import Column, Integer, String, ForeignKey
 from sqlalchemy.orm import relationship
 
 from athena.database import Base, get_db
 from .db_programming_submission import DBProgrammingSubmission
 from .db_feedback import DBFeedback
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBProgrammingFeedback(DBFeedback, Base):
@@ -15,8 +16,8 @@ class DBProgrammingFeedback(DBFeedback, Base):
     line_start: Optional[int] = Column(Integer)  # type: ignore
     line_end: Optional[int] = Column(Integer)  # type: ignore
 
-    exercise_id = Column(BigInteger, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True)
-    submission_id = Column(BigInteger, ForeignKey("programming_submissions.id", ondelete="CASCADE"), index=True)
+    exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True)
+    submission_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_submissions.id", ondelete="CASCADE"), index=True)
 
     exercise = relationship("DBProgrammingExercise", back_populates="feedbacks")
     submission = relationship("DBProgrammingSubmission", back_populates="feedbacks")
diff --git a/athena/athena/models/db_programming_submission.py b/athena/athena/models/db_programming_submission.py
index 607ef8e0c..9dedc1809 100644
--- a/athena/athena/models/db_programming_submission.py
+++ b/athena/athena/models/db_programming_submission.py
@@ -1,15 +1,16 @@
-from sqlalchemy import ForeignKey, BigInteger, Column, String
+from sqlalchemy import ForeignKey, Column, String
 from sqlalchemy.orm import relationship
 
 from athena.database import Base
 from .db_submission import DBSubmission
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBProgrammingSubmission(DBSubmission, Base):
     __tablename__ = "programming_submissions"
     repository_url: str = Column(String, nullable=False)  # type: ignore
 
-    exercise_id = Column(BigInteger, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True)
+    exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True)
 
     exercise = relationship("DBProgrammingExercise", back_populates="submissions")
     feedbacks = relationship("DBProgrammingFeedback", back_populates="submission")
diff --git a/athena/athena/models/db_submission.py b/athena/athena/models/db_submission.py
index b9d55dd2c..c2e490345 100644
--- a/athena/athena/models/db_submission.py
+++ b/athena/athena/models/db_submission.py
@@ -1,7 +1,9 @@
-from sqlalchemy import Column, BigInteger, JSON
+from sqlalchemy import Column, JSON
+
 from .model import Model
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBSubmission(Model):
-    id = Column(BigInteger, primary_key=True, index=True, nullable=False)
+    id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True,)
     meta = Column(JSON, nullable=False)
diff --git a/athena/athena/models/db_text_feedback.py b/athena/athena/models/db_text_feedback.py
index ca31ec4ed..309f4fd28 100644
--- a/athena/athena/models/db_text_feedback.py
+++ b/athena/athena/models/db_text_feedback.py
@@ -1,10 +1,11 @@
 from typing import Optional
 
-from sqlalchemy import Column, Integer, BigInteger, ForeignKey
+from sqlalchemy import Column, Integer, ForeignKey
 from sqlalchemy.orm import relationship
 
 from athena.database import Base
 from .db_feedback import DBFeedback
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBTextFeedback(DBFeedback, Base):
@@ -13,8 +14,8 @@ class DBTextFeedback(DBFeedback, Base):
     index_start: Optional[int] = Column(Integer)  # type: ignore
     index_end: Optional[int] = Column(Integer)  # type: ignore
 
-    exercise_id = Column(BigInteger, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True)
-    submission_id = Column(BigInteger, ForeignKey("text_submissions.id", ondelete="CASCADE"), index=True)
+    exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True)
+    submission_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_submissions.id", ondelete="CASCADE"), index=True)
 
     exercise = relationship("DBTextExercise", back_populates="feedbacks")
     submission = relationship("DBTextSubmission", back_populates="feedbacks")
diff --git a/athena/athena/models/db_text_submission.py b/athena/athena/models/db_text_submission.py
index b4bf90fea..742f6bbfe 100644
--- a/athena/athena/models/db_text_submission.py
+++ b/athena/athena/models/db_text_submission.py
@@ -1,8 +1,9 @@
-from sqlalchemy import ForeignKey, BigInteger, Column, String
+from sqlalchemy import ForeignKey, Column, String
 from sqlalchemy.orm import relationship
 
 from athena.database import Base
 from .db_submission import DBSubmission
+from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBTextSubmission(DBSubmission, Base):
@@ -10,7 +11,7 @@ class DBTextSubmission(DBSubmission, Base):
     text: str = Column(String, nullable=False)  # type: ignore
     language: str = Column(String, nullable=True)  # type: ignore
 
-    exercise_id = Column(BigInteger, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True)
+    exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True)
 
     exercise = relationship("DBTextExercise", back_populates="submissions")
     feedbacks = relationship("DBTextFeedback", back_populates="submission")

From 174aa81bed9ea9d92d18a58939d6f4df5fba650f Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 00:06:29 +0200
Subject: [PATCH 31/51] fix stuff

---
 .../module_programming_llm/generate_suggestions_by_file.py | 7 ++++++-
 .../prompts/generate_suggestions_by_file.py                | 3 ---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 22240d708..93dc67564 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -1,4 +1,5 @@
 from typing import List, Optional, Sequence
+import os
 import asyncio
 from pydantic import BaseModel, Field
 
@@ -80,11 +81,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         file_path=None, 
         name_only=True
     ).split("\n")
+    changed_files_from_template_to_submission = [
+        os.path.join(str(submission_repo.working_tree_dir or ""), file_path)
+        for file_path in changed_files_from_template_to_submission
+    ]
 
     # Changed text files
     changed_files = load_files_from_repo(
         submission_repo, 
-        file_filter=lambda x: x in changed_files_from_template_to_submission
+        file_filter=lambda file_path: file_path in changed_files_from_template_to_submission
     )
 
     for file_path, file_content in changed_files.items():
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index 2e6b27059..344d635d0 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -9,9 +9,6 @@
 Problem statement:
 {problem_statement}
 
-Example solution:
-{example_solution}
-
 Grading instructions:
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}

From d6b1e8033e9114968d7f8f025f0fdd7afec2e5b0 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:41:39 +0200
Subject: [PATCH 32/51] add fixes

---
 module_programming_llm/module_programming_llm/config.py    | 7 +++----
 .../prompts/split_problem_statement_by_file.py             | 2 +-
 .../split_problem_statement_by_file.py                     | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 3507eded0..3d9225a1f 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -22,23 +22,22 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 *Note: `changed_files` are the changed files between template and solution repository.*\
 """
-    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_problem_statements_by_file_human_template,
                                description="Message from a human. The input on which the AI is supposed to act.")
-
+    tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.")
+    
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
 Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
 """
-    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
     human_message: str = Field(default=split_grading_instructions_by_file_human_template,
                                description="Message from a human. The input on which the AI is supposed to act.")
-
+    tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.")
 
 class GenerationPrompt(BaseModel):
     """\
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index c06aea30e..b92710c07 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -11,7 +11,7 @@
 Changed files from template to sample solution:
 {changed_files_from_template_to_solution}
 
-Changed files from template to student submission:
+Changed files from template to student submission (Pick from this list, very important!):
 {changed_files_from_template_to_submission}
 
 Problem statement by file:
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 28df233bf..1cb35f7c8 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -69,7 +69,7 @@ async def split_problem_statement_by_file(
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
         system_message=config.split_problem_statement_by_file_prompt.system_message,
-        human_message=config.split_problem_statement_by_file_prompt.system_message,
+        human_message=config.split_problem_statement_by_file_prompt.human_message,
         pydantic_object=SplitProblemStatement
     )
     

From 3a17659c8806590f35d6f7be9f9c039851aba0e3 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:51:13 +0200
Subject: [PATCH 33/51] more fixes

---
 .../generate_suggestions_by_file.py           | 19 ++++++++++---------
 .../split_grading_instructions_by_file.py     |  4 ++--
 .../split_problem_statement_by_file.py        |  2 +-
 .../split_grading_instructions_by_file.py     | 12 ++++++++++--
 .../split_problem_statement_by_file.py        |  8 ++++++++
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 93dc67564..732ab557b 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -47,10 +47,18 @@ class Config:
 async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
     model = config.model.get_model()
 
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_by_file_prompt.system_message, 
+        human_message=config.generate_suggestions_by_file_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+
     # Get split problem statement and grading instructions by file (if necessary)
     split_problem_statement, split_grading_instructions = await asyncio.gather(
-        split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug),
-        split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug)
+        split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug),
+        split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug)
     )
 
     is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
@@ -141,13 +149,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
             "problem_statement": problem_statement,
         })
     
-    chat_prompt = get_chat_prompt_with_formatting_instructions(
-        model=model, 
-        system_message=config.generate_suggestions_by_file_prompt.system_message, 
-        human_message=config.generate_suggestions_by_file_prompt.human_message, 
-        pydantic_object=AssessmentModel
-    )
-
     # Filter long prompts (omitting features if necessary)
     omittable_features = [
         "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 2f181609f..54f2872f6 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the grading instructions by changed file to make it simpler.
+Restructure the grading instructions by student changed file to make it simpler.
 """
 
 human_template = """\
@@ -11,7 +11,7 @@
 Changed files from template to sample solution:
 {changed_files_from_template_to_solution}
 
-Changed files from template to student submission:
+Changed files from template to student submission (Pick from this list, very important!):
 {changed_files_from_template_to_submission}
 
 Grading instructions by file:
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index b92710c07..397e34893 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,7 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-Restructure the problem statement by changed file.
+Restructure the problem statement by student changed file to make it simpler.
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index cb1f4a29f..97903d809 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -3,6 +3,8 @@
 
 from pydantic import BaseModel, Field
 
+from langchain.prompts import ChatPromptTemplate
+
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -28,6 +30,7 @@ class SplitGradingInstructions(BaseModel):
 async def split_grading_instructions_by_file(
         exercise: Exercise, 
         submission: Submission,
+        prompt: ChatPromptTemplate,
         config: BasicApproachConfig, 
         debug: bool
     ) -> Optional[SplitGradingInstructions]:
@@ -36,6 +39,7 @@ async def split_grading_instructions_by_file(
     Args:
         exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files)
         submission (Submission): Submission to split the grading instructions for (respecting the changed files)
+        prompt (ChatPromptTemplate): Prompt template to check for grading_instructions
         config (BasicApproachConfig): Configuration
 
     Returns:
@@ -44,7 +48,11 @@ async def split_grading_instructions_by_file(
 
     # Return None if the grading instructions are too short
     if (exercise.grading_instructions is None 
-            or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split):
+            or num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split):
+        return None
+
+    # Return None if the grading instructions are not in the prompt
+    if "grading_instructions" not in prompt.input_variables:
         return None
     
     model = config.model.get_model()
@@ -92,7 +100,7 @@ async def split_grading_instructions_by_file(
     )
 
     if debug:
-        emit_meta("file_problem_statement", {
+        emit_meta("file_grading_instructions", {
             "prompt": chat_prompt.format(**prompt_input),
             "result": split_grading_instructions.dict()
         })
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 1cb35f7c8..4790f8ce8 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -3,6 +3,8 @@
 
 from pydantic import BaseModel, Field
 
+from langchain.prompts import ChatPromptTemplate
+
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -28,6 +30,7 @@ class SplitProblemStatement(BaseModel):
 async def split_problem_statement_by_file(
         exercise: Exercise, 
         submission: Submission, 
+        prompt: ChatPromptTemplate,
         config: BasicApproachConfig, 
         debug: bool
     ) -> Optional[SplitProblemStatement]:
@@ -36,6 +39,7 @@ async def split_problem_statement_by_file(
     Args:
         exercise (Exercise): Exercise to split the problem statement for (respecting the changed files)
         submission (Submission): Submission to split the problem statement for (respecting the changed files)
+        prompt (ChatPromptTemplate): Prompt template to check for problem_statement
         config (BasicApproachConfig): Configuration
 
     Returns:
@@ -45,6 +49,10 @@ async def split_problem_statement_by_file(
     # Return None if the problem statement is too short
     if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split:
         return None
+    
+    # Return None if the problem statement not in the prompt
+    if "problem_statement" not in prompt.input_variables:
+        return None
 
     model = config.model.get_model()
 

From f7d080949c9daa30a036d83b828f58e87fd96354 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:51:29 +0200
Subject: [PATCH 34/51] remove empty line

---
 .../module_programming_llm/generate_suggestions_by_file.py       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 732ab557b..c3cd1201e 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -54,7 +54,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         pydantic_object=AssessmentModel
     )
 
-
     # Get split problem statement and grading instructions by file (if necessary)
     split_problem_statement, split_grading_instructions = await asyncio.gather(
         split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug),

From 3f4e07dd857242cbea14a23bba11ea0103c39597 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 09:53:30 +0200
Subject: [PATCH 35/51] delete unuse

---
 .../basic/basic_feedback_provider.py          | 110 ------------------
 1 file changed, 110 deletions(-)
 delete mode 100644 module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py

diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
deleted file mode 100644
index 134fd0e41..000000000
--- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# import json
-# from typing import List
-
-# from langchain.chains import LLMChain
-# from langchain.prompts import (
-#     ChatPromptTemplate,
-#     SystemMessagePromptTemplate,
-#     HumanMessagePromptTemplate,
-# )
-
-# from athena.programming import Exercise, Submission, Feedback
-# from athena.logger import logger
-
-# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers
-# from module_programming_llm.helpers.models import chat
-
-# from ..prompts.basic_feedback_provider import system_template, human_template
-
-# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
-#     max_prompt_length = 2560
-#     input_list: List[dict] = []
-
-#     if exercise.meta['file_grading_instructions'] is None:
-#         raise ValueError("No file grading instructions found for exercise in meta.")
-#     if exercise.meta['file_problem_statements'] is None:
-#         raise ValueError("No file problem statements found for exercise in meta.")
-
-#     # Feature extraction
-#     solution_repo = exercise.get_solution_repository()
-#     template_repo = exercise.get_template_repository()
-#     submission_repo = submission.get_repository()
-    
-#     file_extension = get_programming_language_file_extension(exercise.programming_language)
-#     if file_extension is None:
-#         raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.")
-
-#     for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items():
-#         if submission_content is None:
-#             continue
-            
-#         problem_statement = exercise.meta['file_problem_statements'].get(file_path)
-#         if problem_statement is None:
-#             logger.info("No problem statement for %s, skipping.", file_path)
-#             continue
-
-#         grading_instructions = exercise.meta['file_grading_instructions'].get(file_path)
-#         if grading_instructions is None:
-#             logger.info("No grading instructions for %s, skipping.", file_path)
-#             continue
-
-#         submission_content = add_line_numbers(submission_content)
-#         solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path)
-#         template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path)
-
-#         input_list.append({
-#             "file_path": file_path,
-#             "submission_content": submission_content,
-#             "solution_to_submission_diff": solution_to_submission_diff,
-#             "template_to_submission_diff": template_to_submission_diff,
-#             "grading_instructions": grading_instructions,
-#             "problem_statement": problem_statement,
-#         })
-    
-#     system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
-#     human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
-#     chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-#     # Filter long prompts
-#     input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length]
-
-#     # Completion
-#     chain = LLMChain(llm=chat, prompt=chat_prompt)
-#     if not input_list:
-#         return []
-#     result = await chain.agenerate(input_list)
-    
-#     # Parse result
-#     feedback_proposals: List[Feedback] = []
-#     for input, generations in zip(input_list, result.generations):
-#         file_path = input["file_path"]
-#         for generation in generations:
-#             try:
-#                 feedbacks = json.loads(generation.text)
-#             except json.JSONDecodeError:
-#                 logger.error("Failed to parse feedback json: %s", generation.text)
-#                 continue
-#             if not isinstance(feedbacks, list):
-#                 logger.error("Feedback json is not a list: %s", generation.text)
-#                 continue
-
-#             for feedback in feedbacks:
-#                 line = feedback.get("line", None)
-#                 description = feedback.get("text", None)
-#                 credits = feedback.get("credits", 0.0)
-#                 feedback_proposals.append(
-#                     Feedback(
-#                         id=None,
-#                         exercise_id=exercise.id,
-#                         submission_id=submission.id,
-#                         title="Feedback",
-#                         description=description,
-#                         file_path=file_path,
-#                         line_start=line,
-#                         line_end=None,
-#                         credits=credits,
-#                         meta={},
-#                     )
-#                 )
-
-#     return feedback_proposals
\ No newline at end of file

From 1025769a55574d251f256f10400b780567cbcebf Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:09:08 +0200
Subject: [PATCH 36/51] add small changes

---
 .../module_programming_llm/__main__.py              |  1 +
 .../module_programming_llm/config.py                | 13 +++++++------
 .../generate_suggestions_by_file.py                 | 12 ++++++------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py
index 786d9d824..556313bf6 100644
--- a/module_programming_llm/module_programming_llm/__main__.py
+++ b/module_programming_llm/module_programming_llm/__main__.py
@@ -33,5 +33,6 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co
 
 
 if __name__ == "__main__":
+    # Preload for token estimation later
     tiktoken.get_encoding("cl100k_base")
     app.start()
diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py
index 3d9225a1f..743ddf693 100644
--- a/module_programming_llm/module_programming_llm/config.py
+++ b/module_programming_llm/module_programming_llm/config.py
@@ -18,9 +18,7 @@
 
 class SplitProblemStatementsByFilePrompt(BaseModel):
     """\
-Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
-
-*Note: `changed_files` are the changed files between template and solution repository.*\
+Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\
 """
     system_message: str = Field(default=split_problem_statements_by_file_system_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -31,7 +29,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel):
 
 class SplitGradingInstructionsByFilePrompt(BaseModel):
     """\
-Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**
+Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\
 """
     system_message: str = Field(default=split_grading_instructions_by_file_template,
                                 description="Message for priming AI behavior and instructing it what to do.")
@@ -53,11 +51,14 @@ class GenerationPrompt(BaseModel):
 
 
 class BasicApproachConfig(BaseModel):
-    """This approach uses a LLM with a single prompt to generate feedback in a single step."""
+    """\
+This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \
+Then, it generates suggestions for each file independently.\
+"""
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig())  # type: ignore
     
-    max_number_of_files: int = Field(default=25, description="Maximum number of files.")
+    max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.")
     split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt())
     split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt())
     generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt())
diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index c3cd1201e..179881f1b 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -136,8 +136,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         )
 
         prompt_inputs.append({
-            "file_path": file_path,
-            "priority": len(template_to_solution_diff),
+            "file_path": file_path, # Not really relevant for the prompt
+            "priority": len(template_to_solution_diff), # Not really relevant for the prompt
             "submission_file": file_content,
             "max_points": exercise.max_points,
             "bonus_points": exercise.bonus_points,
@@ -149,15 +149,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         })
     
     # Filter long prompts (omitting features if necessary)
+    # "submission_file" is not omittable, because it is the main input containing the line numbers
+    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
     omittable_features = [
-        "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs)
+        "template_to_solution_diff", # If it is even included in the prompt (has the lowest priority since it is indirectly included in other diffs)
         "problem_statement", 
         "grading_instructions",
         "solution_to_submission_diff",
-        "template_to_submission_diff",
+        "template_to_submission_diff", # In the future we might indicate the changed lines in the submission_file additionally
     ]
-    # "submission_file" is not omittable, because it is the main input containing the line numbers
-    # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
 
     prompt_inputs = [
         omitted_prompt_input for omitted_prompt_input, should_run in

From 7215b73e5372bb6a74adc1dddae512207406581a Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:10:47 +0200
Subject: [PATCH 37/51] typo

---
 .../module_programming_llm/split_problem_statement_by_file.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 4790f8ce8..f2c1f0f1f 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -99,7 +99,7 @@ async def split_problem_statement_by_file(
     )
 
     if debug:
-        emit_meta("file_problem_statement", {
+        emit_meta("file_problem_statements", {
             "prompt": chat_prompt.format(**prompt_input),
             "result": split_problem_statement.dict()
         })

From 16a5ec19df9cb4d2516aff5286deff853667717a Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:30:33 +0200
Subject: [PATCH 38/51] add more fixes

---
 .../split_grading_instructions_by_file.py     |  5 ++---
 .../split_problem_statement_by_file.py        |  5 ++---
 .../split_grading_instructions_by_file.py     | 20 +++++++++++++++++--
 .../split_problem_statement_by_file.py        | 20 +++++++++++++++++--
 4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
index 54f2872f6..21b754846 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py
@@ -1,7 +1,6 @@
 system_template = """\
-You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
-
-Restructure the grading instructions by student changed file to make it simpler.
+Your task is to restructure the grading instructions by student changed file to show a tutor \
+relevant instructions for each file. This should make it easier for the tutor to grade the assignment.\
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
index 397e34893..95dde8787 100644
--- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py
@@ -1,7 +1,6 @@
 system_template = """\
-You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
-
-Restructure the problem statement by student changed file to make it simpler.
+Your task is to restructure the problem statement by student changed file to show the student \
+relevant information for each file. This should make it easier for the student to solve the assignment.\
 """
 
 human_template = """\
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 97903d809..a565b7e0c 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
-from athena import emit_meta
+from collections import defaultdict
 
 from pydantic import BaseModel, Field
-
 from langchain.prompts import ChatPromptTemplate
 
+from athena import emit_meta
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -108,4 +108,20 @@ async def split_grading_instructions_by_file(
     if not split_grading_instructions.file_grading_instructions:
         return None
 
+    # Join duplicate file names (some responses contain multiple grading instructions for the same file)
+    file_grading_instructions_by_file_name = defaultdict(list)
+    for file_grading_instruction in split_grading_instructions.file_grading_instructions:
+        file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction)
+
+    split_grading_instructions.file_grading_instructions = [
+        FileGradingInstruction(
+            file_name=file_name,
+            grading_instructions="\n".join(
+                file_grading_instruction.grading_instructions
+                for file_grading_instruction in file_grading_instructions
+            )
+        )
+        for file_name, file_grading_instructions in file_grading_instructions_by_file_name.items()
+    ]
+
     return split_grading_instructions
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index f2c1f0f1f..ccfc3533a 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
-from athena import emit_meta
+from collections import defaultdict
 
 from pydantic import BaseModel, Field
-
 from langchain.prompts import ChatPromptTemplate
 
+from athena import emit_meta
 from athena.programming import Exercise, Submission
 
 from module_programming_llm.config import BasicApproachConfig
@@ -107,4 +107,20 @@ async def split_problem_statement_by_file(
     if not split_problem_statement.file_problem_statements:
         return None
 
+    # Join duplicate file names (some responses contain multiple problem statements for the same file)
+    file_problem_statements_by_file_name = defaultdict(list)
+    for file_problem_statement in split_problem_statement.file_problem_statements:
+        file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement)
+
+    split_problem_statement.file_problem_statements = [
+        FileProblemStatement(
+            file_name=file_name,
+            problem_statement="\n".join(
+                file_problem_statement.problem_statement
+                for file_problem_statement in file_problem_statements
+            )
+        )
+        for file_name, file_problem_statements in file_problem_statements_by_file_name.items()
+    ]
+
     return split_problem_statement

From 9680c68715b7d2705b77ce45f79d3830a403e1ce Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 10:36:18 +0200
Subject: [PATCH 39/51] fix pydantic

---
 .../module_programming_llm/split_grading_instructions_by_file.py | 1 +
 .../module_programming_llm/split_problem_statement_by_file.py    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index a565b7e0c..0b1a2a615 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -27,6 +27,7 @@ class SplitGradingInstructions(BaseModel):
     file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
+# pylint: disable=too-many-locals
 async def split_grading_instructions_by_file(
         exercise: Exercise, 
         submission: Submission,
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index ccfc3533a..cc72cd3f6 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -27,6 +27,7 @@ class SplitProblemStatement(BaseModel):
     file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
+# pylint: disable=too-many-locals
 async def split_problem_statement_by_file(
         exercise: Exercise, 
         submission: Submission, 

From 6b16d0d0830f51f986347388e7986f48615598b9 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 11:56:38 +0200
Subject: [PATCH 40/51] small improvements

---
 .../generate_suggestions_by_file.py                       | 4 ++--
 .../split_grading_instructions_by_file.py                 | 8 ++++----
 .../split_problem_statement_by_file.py                    | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 179881f1b..e4d533b21 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -63,7 +63,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
     file_problem_statements = { 
         item.file_name: item.problem_statement 
-        for item in split_problem_statement.file_problem_statements 
+        for item in split_problem_statement.items 
     } if split_problem_statement is not None else {}
 
     is_short_grading_instructions = (
@@ -72,7 +72,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     )
     file_grading_instructions = { 
         item.file_name: item.grading_instructions 
-        for item in split_grading_instructions.file_grading_instructions 
+        for item in split_grading_instructions.items 
     } if split_grading_instructions is not None else {}
 
     prompt_inputs: List[dict] = []
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 0b1a2a615..388d7a865 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -24,7 +24,7 @@ class FileGradingInstruction(BaseModel):
 
 class SplitGradingInstructions(BaseModel):
     """Collection of grading instructions split by file"""
-    file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+    items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
 
 
 # pylint: disable=too-many-locals
@@ -106,15 +106,15 @@ async def split_grading_instructions_by_file(
             "result": split_grading_instructions.dict()
         })
 
-    if not split_grading_instructions.file_grading_instructions:
+    if not split_grading_instructions.items:
         return None
 
     # Join duplicate file names (some responses contain multiple grading instructions for the same file)
     file_grading_instructions_by_file_name = defaultdict(list)
-    for file_grading_instruction in split_grading_instructions.file_grading_instructions:
+    for file_grading_instruction in split_grading_instructions.items:
         file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction)
 
-    split_grading_instructions.file_grading_instructions = [
+    split_grading_instructions.items = [
         FileGradingInstruction(
             file_name=file_name,
             grading_instructions="\n".join(
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index cc72cd3f6..d423a8a48 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -24,7 +24,7 @@ class FileProblemStatement(BaseModel):
 
 class SplitProblemStatement(BaseModel):
     """Collection of problem statements split by file"""
-    file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+    items: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
 
 
 # pylint: disable=too-many-locals
@@ -105,15 +105,15 @@ async def split_problem_statement_by_file(
             "result": split_problem_statement.dict()
         })
 
-    if not split_problem_statement.file_problem_statements:
+    if not split_problem_statement.items:
         return None
 
     # Join duplicate file names (some responses contain multiple problem statements for the same file)
     file_problem_statements_by_file_name = defaultdict(list)
-    for file_problem_statement in split_problem_statement.file_problem_statements:
+    for file_problem_statement in split_problem_statement.items:
         file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement)
 
-    split_problem_statement.file_problem_statements = [
+    split_problem_statement.items = [
         FileProblemStatement(
             file_name=file_name,
             problem_statement="\n".join(

From ac40ebef846495d90e9e374d8f3c60d4cbcc6d07 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 12:45:42 +0200
Subject: [PATCH 41/51] add final fixes

---
 .../generate_suggestions_by_file.py           |  4 +--
 .../helpers/llm_utils.py                      | 33 ++++++++++++-------
 .../prompts/generate_suggestions_by_file.py   | 16 ++++-----
 .../split_grading_instructions_by_file.py     |  4 +--
 .../split_problem_statement_by_file.py        |  4 +--
 5 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index e4d533b21..152fb6660 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -194,7 +194,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
             filtered_prompt_inputs.append(prompt_inputs.pop(0))
         prompt_inputs = filtered_prompt_inputs
    
-    results: List[AssessmentModel] = await asyncio.gather(*[
+    results: List[Optional[AssessmentModel]] = await asyncio.gather(*[
         predict_and_parse(
             model=model, 
             chat_prompt=chat_prompt, 
@@ -209,7 +209,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
                 {
                     "file_path": prompt_input["file_path"],
                     "prompt": chat_prompt.format(**prompt_input),
-                    "result": result.dict()
+                    "result": result.dict() if result is not None else None
                 }
                 for prompt_input, result in zip(prompt_inputs, results)
             ]
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index b59ca9dab..394bdd2f7 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -1,6 +1,5 @@
-from typing import Type, TypeVar, List
-from pydantic import BaseModel
-
+from typing import Optional, Type, TypeVar, List
+from pydantic import BaseModel, ValidationError
 import tiktoken
 
 from langchain.chains import LLMChain
@@ -11,8 +10,9 @@
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
-from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
 from langchain.chains.openai_functions import create_structured_output_chain
+from langchain.output_parsers import PydanticOutputParser
+from langchain.schema import OutputParserException
 
 from athena import emit_meta
 
@@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions(
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 
-async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]:
     """Predicts and parses the output of the model
 
     Args:
@@ -122,12 +122,23 @@ async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTem
         chat_prompt (ChatPromptTemplate): Prompt to use
         prompt_input (dict): Input parameters to use for the prompt
         pydantic_object (Type[T]): Pydantic model to parse the output
+
+    Returns:
+        Optional[T]: Parsed output, or None if it could not be parsed
     """
     if supports_function_calling(model):
         chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
-        return chain.run(**prompt_input)
-    
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
-    chain = LLMChain(llm=model, prompt=chat_prompt)
-    output = chain.run(**prompt_input)
-    return output_parser.parse(output)
+        
+        try:
+            return await chain.arun(**prompt_input)
+        except (OutputParserException, ValidationError):
+            # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+            return None
+
+    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
+    chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser)
+    try:
+        return await chain.arun(**prompt_input)
+    except (OutputParserException, ValidationError):
+        # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+        return None
diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
index 344d635d0..7535fd244 100644
--- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py
@@ -1,11 +1,6 @@
 system_template = """\
 You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments.
 
-VERY IMPORTANT: Effective feedback for text assignments should be:
-1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\
-"""
-
-human_template = """\
 Problem statement:
 {problem_statement}
 
@@ -13,12 +8,17 @@
 {grading_instructions}
 Max points: {max_points}, bonus points: {bonus_points}
 
-Student\'s submission file to grade (with line numbers <number>: <line>):
-{submission_file}
-
 Diff between solution (deletions) and student\'s submission (additions):
 {solution_to_submission_diff}
 
+VERY IMPORTANT: Effective feedback for text assignments should be:
+1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\
+"""
+
+human_template = """\
 Diff between template (deletions) and student\'s submission (additions):
 {template_to_submission_diff}
+
+Student\'s submission file to grade (with line numbers <number>: <line>):
+{submission_file}
 """
\ No newline at end of file
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 388d7a865..4ba10cb54 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -103,10 +103,10 @@ async def split_grading_instructions_by_file(
     if debug:
         emit_meta("file_grading_instructions", {
             "prompt": chat_prompt.format(**prompt_input),
-            "result": split_grading_instructions.dict()
+            "result": split_grading_instructions.dict() if split_grading_instructions is not None else None
         })
 
-    if not split_grading_instructions.items:
+    if split_grading_instructions is None or not split_grading_instructions.items:
         return None
 
     # Join duplicate file names (some responses contain multiple grading instructions for the same file)
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index d423a8a48..4a4761610 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -102,10 +102,10 @@ async def split_problem_statement_by_file(
     if debug:
         emit_meta("file_problem_statements", {
             "prompt": chat_prompt.format(**prompt_input),
-            "result": split_problem_statement.dict()
+            "result": split_problem_statement.dict() if split_problem_statement is not None else None
         })
 
-    if not split_problem_statement.items:
+    if split_problem_statement is None or not split_problem_statement.items:
         return None
 
     # Join duplicate file names (some responses contain multiple problem statements for the same file)

From 24d068c6ee7edf99849b142191c50f6e3ba9d687 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 12:48:51 +0200
Subject: [PATCH 42/51] fix parsing

---
 .../module_text_llm/generate_suggestions.py   | 10 ++++--
 .../module_text_llm/helpers/llm_utils.py      | 34 ++++++++++++-------
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py
index b8110be1e..2acb2c803 100644
--- a/module_text_llm/module_text_llm/generate_suggestions.py
+++ b/module_text_llm/module_text_llm/generate_suggestions.py
@@ -71,7 +71,7 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
             emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
         return []
 
-    result = predict_and_parse(
+    result = await predict_and_parse(
         model=model, 
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input, 
@@ -79,7 +79,13 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
     )
 
     if debug:
-        emit_meta("prompt", chat_prompt.format(**prompt_input))
+        emit_meta("generate_suggestions", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": result.dict() if result is not None else None
+        })
+
+    if result is None:
+        return []
 
     feedbacks = []
     for feedback in result.feedbacks:
diff --git a/module_text_llm/module_text_llm/helpers/llm_utils.py b/module_text_llm/module_text_llm/helpers/llm_utils.py
index 1cfd646e6..394bdd2f7 100644
--- a/module_text_llm/module_text_llm/helpers/llm_utils.py
+++ b/module_text_llm/module_text_llm/helpers/llm_utils.py
@@ -1,6 +1,5 @@
-from typing import Type, TypeVar, List
-from pydantic import BaseModel
-
+from typing import Optional, Type, TypeVar, List
+from pydantic import BaseModel, ValidationError
 import tiktoken
 
 from langchain.chains import LLMChain
@@ -11,12 +10,12 @@
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
-from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
 from langchain.chains.openai_functions import create_structured_output_chain
+from langchain.output_parsers import PydanticOutputParser
+from langchain.schema import OutputParserException
 
 from athena import emit_meta
 
-
 T = TypeVar("T", bound=BaseModel)
 
 
@@ -115,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions(
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 
-def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]):
+async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]:
     """Predicts and parses the output of the model
 
     Args:
@@ -123,12 +122,23 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate,
         chat_prompt (ChatPromptTemplate): Prompt to use
         prompt_input (dict): Input parameters to use for the prompt
         pydantic_object (Type[T]): Pydantic model to parse the output
+
+    Returns:
+        Optional[T]: Parsed output, or None if it could not be parsed
     """
     if supports_function_calling(model):
         chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt)
-        return chain.run(**prompt_input)
-    
-    output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model)
-    chain = LLMChain(llm=model, prompt=chat_prompt)
-    output = chain.run(**prompt_input)
-    return output_parser.parse(output)
+        
+        try:
+            return await chain.arun(**prompt_input)
+        except (OutputParserException, ValidationError):
+            # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+            return None
+
+    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
+    chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser)
+    try:
+        return await chain.arun(**prompt_input)
+    except (OutputParserException, ValidationError):
+        # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
+        return None

From 67101e87d24c2f3c8a916e9d8ed28e7751420510 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 12:57:57 +0200
Subject: [PATCH 43/51] improve parsing

---
 .../module_programming_llm/helpers/llm_utils.py                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index 394bdd2f7..82db286f2 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -110,7 +110,7 @@ def get_chat_prompt_with_formatting_instructions(
     system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
     system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
     system_message_prompt.prompt.input_variables.remove("format_instructions")
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:")
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON response following the provided schema:")
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 

From 4c077af40742906753dd43e99387f90b1ebbf87b Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 12:59:42 +0200
Subject: [PATCH 44/51] add new line

---
 .../module_programming_llm/helpers/llm_utils.py                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index 82db286f2..ab15edc60 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -110,7 +110,7 @@ def get_chat_prompt_with_formatting_instructions(
     system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
     system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
     system_message_prompt.prompt.input_variables.remove("format_instructions")
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON response following the provided schema:")
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:")
     return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
 
 

From 2e96edfcfa2562369e3545d9414915315478611c Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 20 Aug 2023 13:11:49 +0200
Subject: [PATCH 45/51] fix type issue

---
 .../module_programming_llm/generate_suggestions_by_file.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 152fb6660..56651afd6 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -218,6 +218,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
     feedbacks: List[Feedback] = []
     for prompt_input, result in zip(prompt_inputs, results):
         file_path = prompt_input["file_path"]
+        if result is None:
+            continue
         for feedback in result.feedbacks:
             feedbacks.append(Feedback(
                 exercise_id=exercise.id,

From ec4d36ebf2506fc2c21b4f059c941503bee0c920 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 27 Aug 2023 23:19:53 +0200
Subject: [PATCH 46/51] add prospector fixes

---
 .../module_programming_llm/generate_suggestions_by_file.py | 2 +-
 .../module_programming_llm/helpers/models/__init__.py      | 6 ++++--
 .../module_programming_llm/helpers/models/replicate.py     | 7 ++++++-
 .../split_grading_instructions_by_file.py                  | 2 +-
 .../split_problem_statement_by_file.py                     | 2 +-
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 56651afd6..160c72471 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -45,7 +45,7 @@ class Config:
 
 # pylint: disable=too-many-locals
 async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
-    model = config.model.get_model()
+    model = config.model.get_model()  # type: ignore[attr-defined]
 
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
diff --git a/module_programming_llm/module_programming_llm/helpers/models/__init__.py b/module_programming_llm/module_programming_llm/helpers/models/__init__.py
index f5ab68a2f..f77d791cd 100644
--- a/module_programming_llm/module_programming_llm/helpers/models/__init__.py
+++ b/module_programming_llm/module_programming_llm/helpers/models/__init__.py
@@ -30,7 +30,9 @@
 if 'DefaultModelConfig' not in globals():
     DefaultModelConfig = types[0]
 
+type0 = types[0]
 if len(types) == 1:
-    ModelConfigType = types[0]
+    ModelConfigType = type0
 else:
-    ModelConfigType = Union[tuple(types)]  # type: ignore
+    type1 = types[1]
+    ModelConfigType = Union[type0, type1] # type: ignore
diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
index a706b8247..af2e30fa5 100644
--- a/module_programming_llm/module_programming_llm/helpers/models/replicate.py
+++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
@@ -10,6 +10,11 @@
 
 
 # Hardcoded list of models
+# If necessary, add more models from replicate here, the config below might need adjustments depending on the available
+# parameters of the model
+#
+# To update the version of the models, go to the respective page on replicate.com and copy the (latest) version id 
+# from and paste it after the colon in the value of the dictionary. Ever so often a new version is released.
 replicate_models = {
     # LLAMA 2 70B Chat
     # https://replicate.com/replicate/llama-2-70b-chat
@@ -23,7 +28,7 @@
 }
 
 available_models = {}
-if len(os.environ.get("REPLICATE_API_TOKEN") or "") > 0:
+if os.environ.get("REPLICATE_API_TOKEN"):  # If Replicate is available
     available_models = {
         name: Replicate(
             model=model,
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 4ba10cb54..695850ec6 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -56,7 +56,7 @@ async def split_grading_instructions_by_file(
     if "grading_instructions" not in prompt.input_variables:
         return None
     
-    model = config.model.get_model()
+    model = config.model.get_model()  # type: ignore[attr-defined]
     
     template_repo = exercise.get_template_repository()
     solution_repo = exercise.get_solution_repository()
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 8892685fa..ae90443a8 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -55,7 +55,7 @@ async def split_problem_statement_by_file(
     if "problem_statement" not in prompt.input_variables:
         return None
 
-    model = config.model.get_model()
+    model = config.model.get_model()  # type: ignore[attr-defined]
 
     template_repo = exercise.get_template_repository()
     solution_repo = exercise.get_solution_repository()

From 64a72a3d74372d210432d7f4c95944015d3adea5 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 27 Aug 2023 23:23:50 +0200
Subject: [PATCH 47/51] fix playground ui

---
 .../src/components/selectors/module_config_select/index.tsx    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/playground/src/components/selectors/module_config_select/index.tsx b/playground/src/components/selectors/module_config_select/index.tsx
index 9434260a1..765088b1c 100644
--- a/playground/src/components/selectors/module_config_select/index.tsx
+++ b/playground/src/components/selectors/module_config_select/index.tsx
@@ -22,11 +22,12 @@ import ModuleLLMConfig from "./module_llm";
 // 4. Add your component to the customModuleConfigComponents object
 //
 // Use ModuleLLMConfig as example.
-type CustomModuleConfig = "module_text_llm";
+type CustomModuleConfig = "module_text_llm" | "module_programming_llm";
 const customModuleConfigComponents: {
   [key in CustomModuleConfig]: React.FC<ModuleConfigSelectProps>;
 } = {
   module_text_llm: ModuleLLMConfig,
+  module_programming_llm: ModuleLLMConfig,
 };
 
 type SetConfig = Dispatch<SetStateAction<any>>;

From 2d710fd202f6c4fd47dc967d0ebf48b83d40163d Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 27 Aug 2023 23:38:38 +0200
Subject: [PATCH 48/51] add problem statement fixes

---
 .../module_programming_llm/generate_suggestions_by_file.py   | 5 +++--
 .../split_problem_statement_by_file.py                       | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 160c72471..34fd4416b 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -60,7 +60,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug)
     )
 
-    is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split
+    problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "")
+    is_short_problem_statement = problem_statement_tokens <= config.split_problem_statement_by_file_prompt.tokens_before_split
     file_problem_statements = { 
         item.file_name: item.problem_statement 
         for item in split_problem_statement.items 
@@ -101,7 +102,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
 
     for file_path, file_content in changed_files.items():
         problem_statement = (
-            exercise.problem_statement if is_short_problem_statement 
+            exercise.problem_statement or "" if is_short_problem_statement 
             else file_problem_statements.get(file_path, "No relevant problem statement section found.")
         )
         problem_statement = problem_statement if problem_statement.strip() else "No problem statement found."
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index ae90443a8..7aee29e4a 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -48,7 +48,7 @@ async def split_problem_statement_by_file(
     """
     
     # Return None if the problem statement is too short
-    if num_tokens_from_string(exercise.problem_statement or "No problem statement.") <= config.split_problem_statement_by_file_prompt.tokens_before_split:
+    if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_by_file_prompt.tokens_before_split:
         return None
     
     # Return None if the problem statement not in the prompt
@@ -83,7 +83,7 @@ async def split_problem_statement_by_file(
     )
     
     prompt_input = {
-        "problem_statement": exercise.problem_statement,
+        "problem_statement": exercise.problem_statement or "No problem statement.",
         "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution),
         "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission)
     }

From 5c922c8714e84bdb1b0c2417f6782109afbc5b3b Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Sun, 3 Sep 2023 19:31:34 +0200
Subject: [PATCH 49/51] add codellama

---
 .../module_programming_llm/helpers/models/replicate.py      | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
index af2e30fa5..15eef0216 100644
--- a/module_programming_llm/module_programming_llm/helpers/models/replicate.py
+++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py
@@ -25,6 +25,12 @@
     # LLaMA 2 7B Chat
     # https://replicate.com/a16z-infra/llama-2-7b-chat
     "llama-2-7b-chat": "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c",
+    # CodeLLAMA 2 13B 
+    # https://replicate.com/replicate/codellama-13b
+    "codellama-13b": "replicate/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
+    # CodeLLAMA 2 34B
+    # https://replicate.com/replicate/codellama-34b
+    "codellama-34b": "replicate/codellama-34b:0666717e5ead8557dff55ee8f11924b5c0309f5f1ca52f64bb8eec405fdb38a7",
 }
 
 available_models = {}

From f0bdb8d25f40996bc3d1e0d8869bd6161391ed47 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Thu, 19 Oct 2023 16:18:01 +0200
Subject: [PATCH 50/51] implement feedback

---
 .../generate_suggestions_by_file.py                    | 10 +++++-----
 .../module_programming_llm/helpers/llm_utils.py        |  2 +-
 .../split_grading_instructions_by_file.py              |  6 +++---
 .../split_problem_statement_by_file.py                 |  6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index 34fd4416b..bcd3d0666 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -24,10 +24,10 @@
 
 
 class FeedbackModel(BaseModel):
-    title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error")
-    description: str = Field(..., description="Feedback description")
-    line_start: Optional[int] = Field(..., description="Referenced line number start, or empty if unreferenced")
-    line_end: Optional[int] = Field(..., description="Referenced line number end, or empty if unreferenced")
+    title: str = Field(description="Very short title, i.e. feedback category", example="Logic Error")
+    description: str = Field(description="Feedback description")
+    line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
+    line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
     credits: float = Field(0.0, description="Number of points received/deducted")
 
     class Config:
@@ -37,7 +37,7 @@ class Config:
 class AssessmentModel(BaseModel):
     """Collection of feedbacks making up an assessment"""
     
-    feedbacks: Sequence[FeedbackModel] = Field(..., description="Assessment feedbacks")
+    feedbacks: Sequence[FeedbackModel] = Field(description="Assessment feedbacks")
 
     class Config:
         title = "Assessment"
diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
index ab15edc60..5faad8b63 100644
--- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py
+++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py
@@ -115,7 +115,7 @@ def get_chat_prompt_with_formatting_instructions(
 
 
 async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]:
-    """Predicts and parses the output of the model
+    """Predicts an LLM completion using the model and parses the output using the provided Pydantic model
 
     Args:
         model (BaseLanguageModel): The model to predict with
diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
index 695850ec6..a8ef00fb1 100644
--- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
@@ -18,13 +18,13 @@
 
 
 class FileGradingInstruction(BaseModel):
-    file_name: str = Field(..., description="File name")
-    grading_instructions: str = Field(..., description="Grading instructions relevant for this file")
+    file_name: str = Field(description="File name")
+    grading_instructions: str = Field(description="Grading instructions relevant for this file")
 
 
 class SplitGradingInstructions(BaseModel):
     """Collection of grading instructions split by file"""
-    items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions")
+    items: Sequence[FileGradingInstruction] = Field(description="File grading instructions")
 
 
 # pylint: disable=too-many-locals
diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
index 7aee29e4a..a877f8c77 100644
--- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
+++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
@@ -18,13 +18,13 @@
 
 
 class FileProblemStatement(BaseModel):
-    file_name: str = Field(..., description="File name")
-    problem_statement: str = Field(..., description="Problem statement relevant for this file")
+    file_name: str = Field(description="File name")
+    problem_statement: str = Field(description="Problem statement relevant for this file")
 
 
 class SplitProblemStatement(BaseModel):
     """Collection of problem statements split by file"""
-    items: Sequence[FileProblemStatement] = Field(..., description="File problem statements")
+    items: Sequence[FileProblemStatement] = Field(description="File problem statements")
 
 
 # pylint: disable=too-many-locals

From 70b8f407d0d891a525784d9745625dbc84b32673 Mon Sep 17 00:00:00 2001
From: "Felix T.J. Dietrich" <felixtj.dietrich@tum.de>
Date: Fri, 20 Oct 2023 10:19:29 +0200
Subject: [PATCH 51/51] implement feedback

---
 .../module_programming_llm/generate_suggestions_by_file.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
index bcd3d0666..2d646026f 100644
--- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
+++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py
@@ -100,6 +100,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         file_filter=lambda file_path: file_path in changed_files_from_template_to_submission
     )
 
+    # Gather prompt inputs for each changed file (independently)
     for file_path, file_content in changed_files.items():
         problem_statement = (
             exercise.problem_statement or "" if is_short_problem_statement 
@@ -150,6 +151,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio
         })
     
     # Filter long prompts (omitting features if necessary)
+    # Lowest priority features are at the top of the list (i.e. they are omitted first if necessary)
     # "submission_file" is not omittable, because it is the main input containing the line numbers
     # In the future we might be able to include the line numbers in the diff, but for now we need to keep it
     omittable_features = [