From 15aae1dc3ccdc4bde38a81ff9b1da0328402d31c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Fri, 18 Aug 2023 22:27:17 +0200 Subject: [PATCH 01/51] update dependencies --- module_programming_llm/poetry.lock | 686 +++++++++++++++----------- module_programming_llm/pyproject.toml | 7 +- 2 files changed, 394 insertions(+), 299 deletions(-) diff --git a/module_programming_llm/poetry.lock b/module_programming_llm/poetry.lock index d0015f73a..c0f838c48 100644 --- a/module_programming_llm/poetry.lock +++ b/module_programming_llm/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -112,6 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -126,6 +128,7 @@ frozenlist = ">=1.1.0" name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -145,23 +148,25 @@ trio = ["trio (<0.22)"] [[package]] name = "async-timeout" -version = "4.0.2" +version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, - {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, ] [[package]] name = "athena" version = "1.0.0" description = "This is a helper module for easier development of Athena modules. It provides communication functionality with the Assessment Module manager, as well as helper functions for storage." +category = "main" optional = false python-versions = "^3.10" files = [] -develop = false +develop = true [package.dependencies] fastapi = "^0.96.0" @@ -169,7 +174,7 @@ gitpython = "^3.1.31" httpx = "^0.24.1" psycopg2 = "^2.9.6" sqlalchemy = {version = "^2.0.15", extras = ["mypy"]} -uvicorn = "^0.22.0" +uvicorn = "^0.23.0" [package.source] type = "directory" @@ -179,6 +184,7 @@ url = "../athena" name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -197,6 +203,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -208,6 +215,7 @@ files = [ name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -290,13 +298,14 @@ files = [ [[package]] name = "click" -version = "8.1.6" +version = "8.1.7" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, - {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, ] [package.dependencies] @@ -306,6 +315,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -317,6 +327,7 @@ files = [ name = "dataclasses-json" version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -334,13 +345,14 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( [[package]] name = "exceptiongroup" -version = "1.1.2" +version = "1.1.3" description = "Backport of PEP 654 (exception groups)" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, - {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, ] [package.extras] @@ -350,6 +362,7 @@ test = ["pytest (>=6)"] name = "fastapi" version = "0.96.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -371,6 +384,7 @@ test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6 name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -441,6 +455,7 @@ files = [ name = "gitdb" version = "4.0.10" description = "Git Object Database" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -455,6 +470,7 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.32" description = "GitPython is a Python library used to interact with Git repositories" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -469,6 +485,7 @@ gitdb = ">=4.0.1,<5" name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -542,6 +559,7 @@ test = ["objgraph", "psutil"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -553,6 +571,7 @@ files = [ name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -564,16 +583,17 @@ files = [ anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = "==1.*" +sniffio = ">=1.0.0,<2.0.0" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "httpx" version = "0.24.1" description = "The next generation HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -589,14 +609,15 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -604,65 +625,79 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, + {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, +] + [[package]] name = "langchain" -version = "0.0.225" +version = "0.0.267" description = "Building applications with LLMs through composability" +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain-0.0.225-py3-none-any.whl", hash = "sha256:a72132ceb22f9a9fcad944778fd351d33b63e27a9c03f61d17e4e52fad37fe47"}, - {file = "langchain-0.0.225.tar.gz", hash = "sha256:986e0838faef3c344d8c00b0079814ca357a39b17f7642e06beb075d3c59c770"}, + {file = "langchain-0.0.267-py3-none-any.whl", hash = "sha256:191ab96aa6f633ecf850e944b68782e7bc237495bd91132e5ff6f9749f452f97"}, + {file = "langchain-0.0.267.tar.gz", hash = "sha256:61ee406332d9f87b71d662883f99677d39c37c6e8dbabd1c0b88335c0df43043"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">=0.5.7,<0.6.0" -langchainplus-sdk = ">=0.0.20,<0.0.21" +langsmith = ">=0.0.21,<0.1.0" numexpr = ">=2.8.4,<3.0.0" numpy = ">=1,<2" openapi-schema-pydantic = ">=1.2,<2.0" -pydantic = ">=1,<2" -PyYAML = ">=5.4.1" +pydantic = ">=1,<3" +PyYAML = ">=5.3" requests = ">=2,<3" SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<9.0.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.9.1,<0.10.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] -azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0a20230509004)", "openai (>=0,<1)"] -clarifai = ["clarifai (==9.1.0)"] -cohere = ["cohere (>=3,<4)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"] +clarifai = ["clarifai (>=9.1.0)"] +cohere = ["cohere (>=4,<5)"] docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.32)"] +extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] -llms = ["anthropic (>=0.3,<0.4)", "clarifai (==9.1.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] -qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"] +qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] text-helpers = ["chardet (>=5.1.0,<6.0.0)"] [[package]] -name = "langchainplus-sdk" -version = "0.0.20" +name = "langsmith" +version = "0.0.24" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchainplus_sdk-0.0.20-py3-none-any.whl", hash = "sha256:07a869d476755803aa04c4986ce78d00c2fe4ff584c0eaa57d7570c9664188db"}, - {file = "langchainplus_sdk-0.0.20.tar.gz", hash = "sha256:3d300e2e3290f68cc9d842c059f9458deba60e776c9e790309688cad1bfbb219"}, + {file = "langsmith-0.0.24-py3-none-any.whl", hash = "sha256:f9f951d070aa1919123d700642aca9c781edfc8797a65ab1161aa12f89bed707"}, + {file = "langsmith-0.0.24.tar.gz", hash = "sha256:9c066dd915752324490a735692997b0db0958f5dfc1e0a0dfbf752c6e62c7529"}, ] [package.dependencies] -pydantic = ">=1,<2" +pydantic = ">=1,<3" requests = ">=2,<3" -tenacity = ">=8.1.0,<9.0.0" [[package]] name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -683,6 +718,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-enum" version = "1.5.1" description = "Enum field for Marshmallow" +category = "main" optional = false python-versions = "*" files = [ @@ -697,6 +733,7 @@ marshmallow = ">=2.0.0" name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -778,37 +815,39 @@ files = [ [[package]] name = "mypy" -version = "1.4.1" +version = "1.5.1" description = "Optional static typing for Python" +category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"}, - {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"}, - {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"}, - {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"}, - {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"}, - {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"}, - {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"}, - {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"}, - {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"}, - {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"}, - {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"}, - {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"}, - {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"}, - {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"}, - {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"}, - {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"}, - {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"}, - {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"}, - {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"}, - {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"}, - {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"}, - {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"}, - {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"}, - {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"}, - {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"}, - {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"}, + {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"}, + {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"}, + {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"}, + {file = "mypy-1.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d"}, + {file = "mypy-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25"}, + {file = "mypy-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4"}, + {file = "mypy-1.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4"}, + {file = "mypy-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243"}, + {file = "mypy-1.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275"}, + {file = "mypy-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315"}, + {file = "mypy-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb"}, + {file = "mypy-1.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373"}, + {file = "mypy-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161"}, + {file = "mypy-1.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a"}, + {file = "mypy-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1"}, + {file = "mypy-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65"}, + {file = "mypy-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160"}, + {file = "mypy-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2"}, + {file = "mypy-1.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb"}, + {file = "mypy-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f"}, + {file = "mypy-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a"}, + {file = "mypy-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14"}, + {file = "mypy-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb"}, + {file = "mypy-1.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693"}, + {file = "mypy-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770"}, + {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"}, + {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"}, ] [package.dependencies] @@ -819,13 +858,13 @@ typing-extensions = ">=4.1.0" [package.extras] dmypy = ["psutil (>=4.0)"] install-types = ["pip"] -python2 = ["typed-ast (>=1.4.0,<2)"] reports = ["lxml"] [[package]] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -833,43 +872,70 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "numexpr" -version = "2.8.4" +version = "2.8.5" description = "Fast numerical expression evaluator for NumPy" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "numexpr-2.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a75967d46b6bd56455dd32da6285e5ffabe155d0ee61eef685bbfb8dafb2e484"}, - {file = "numexpr-2.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db93cf1842f068247de631bfc8af20118bf1f9447cd929b531595a5e0efc9346"}, - {file = "numexpr-2.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bca95f4473b444428061d4cda8e59ac564dc7dc6a1dea3015af9805c6bc2946"}, - {file = "numexpr-2.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e34931089a6bafc77aaae21f37ad6594b98aa1085bb8b45d5b3cd038c3c17d9"}, - {file = "numexpr-2.8.4-cp310-cp310-win32.whl", hash = "sha256:f3a920bfac2645017110b87ddbe364c9c7a742870a4d2f6120b8786c25dc6db3"}, - {file = "numexpr-2.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:6931b1e9d4f629f43c14b21d44f3f77997298bea43790cfcdb4dd98804f90783"}, - {file = "numexpr-2.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9400781553541f414f82eac056f2b4c965373650df9694286b9bd7e8d413f8d8"}, - {file = "numexpr-2.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6ee9db7598dd4001138b482342b96d78110dd77cefc051ec75af3295604dde6a"}, - {file = "numexpr-2.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff5835e8af9a212e8480003d731aad1727aaea909926fd009e8ae6a1cba7f141"}, - {file = "numexpr-2.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:655d84eb09adfee3c09ecf4a89a512225da153fdb7de13c447404b7d0523a9a7"}, - {file = "numexpr-2.8.4-cp311-cp311-win32.whl", hash = "sha256:5538b30199bfc68886d2be18fcef3abd11d9271767a7a69ff3688defe782800a"}, - {file = "numexpr-2.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:3f039321d1c17962c33079987b675fb251b273dbec0f51aac0934e932446ccc3"}, - {file = "numexpr-2.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c867cc36cf815a3ec9122029874e00d8fbcef65035c4a5901e9b120dd5d626a2"}, - {file = "numexpr-2.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:059546e8f6283ccdb47c683101a890844f667fa6d56258d48ae2ecf1b3875957"}, - {file = "numexpr-2.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:845a6aa0ed3e2a53239b89c1ebfa8cf052d3cc6e053c72805e8153300078c0b1"}, - {file = "numexpr-2.8.4-cp37-cp37m-win32.whl", hash = "sha256:a38664e699526cb1687aefd9069e2b5b9387da7feac4545de446141f1ef86f46"}, - {file = "numexpr-2.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:eaec59e9bf70ff05615c34a8b8d6c7bd042bd9f55465d7b495ea5436f45319d0"}, - {file = "numexpr-2.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b318541bf3d8326682ebada087ba0050549a16d8b3fa260dd2585d73a83d20a7"}, - {file = "numexpr-2.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b076db98ca65eeaf9bd224576e3ac84c05e451c0bd85b13664b7e5f7b62e2c70"}, - {file = "numexpr-2.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90f12cc851240f7911a47c91aaf223dba753e98e46dff3017282e633602e76a7"}, - {file = "numexpr-2.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c368aa35ae9b18840e78b05f929d3a7b3abccdba9630a878c7db74ca2368339"}, - {file = "numexpr-2.8.4-cp38-cp38-win32.whl", hash = "sha256:b96334fc1748e9ec4f93d5fadb1044089d73fb08208fdb8382ed77c893f0be01"}, - {file = "numexpr-2.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:a6d2d7740ae83ba5f3531e83afc4b626daa71df1ef903970947903345c37bd03"}, - {file = "numexpr-2.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:77898fdf3da6bb96aa8a4759a8231d763a75d848b2f2e5c5279dad0b243c8dfe"}, - {file = "numexpr-2.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:df35324666b693f13a016bc7957de7cc4d8801b746b81060b671bf78a52b9037"}, - {file = "numexpr-2.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ac9cfe6d0078c5fc06ba1c1bbd20b8783f28c6f475bbabd3cad53683075cab"}, - {file = "numexpr-2.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df3a1f6b24214a1ab826e9c1c99edf1686c8e307547a9aef33910d586f626d01"}, - {file = "numexpr-2.8.4-cp39-cp39-win32.whl", hash = "sha256:7d71add384adc9119568d7e9ffa8a35b195decae81e0abf54a2b7779852f0637"}, - {file = "numexpr-2.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:9f096d707290a6a00b6ffdaf581ee37331109fb7b6c8744e9ded7c779a48e517"}, - {file = "numexpr-2.8.4.tar.gz", hash = "sha256:d5432537418d18691b9115d615d6daa17ee8275baef3edf1afbbf8bc69806147"}, + {file = "numexpr-2.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51f3ab160c3847ebcca93cd88f935a7802b54a01ab63fe93152994a64d7a6cf2"}, + {file = "numexpr-2.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:de29c77f674e4eb8f0846525a475cab64008c227c8bc4ba5153ab3f72441cc63"}, + {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf85ba1327eb87ec82ae7936f13c8850fb969a0ca34f3ba9fa3897c09d5c80d7"}, + {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c00be69f747f44a631830215cab482f0f77f75af2925695adff57c1cc0f9a68"}, + {file = "numexpr-2.8.5-cp310-cp310-win32.whl", hash = "sha256:c46350dcdb93e32f033eea5a21269514ffcaf501d9abd6036992d37e48a308b0"}, + {file = "numexpr-2.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:894b027438b8ec88dea32a19193716c79f4ff8ddb92302dcc9731b51ba3565a8"}, + {file = "numexpr-2.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6df184d40d4cf9f21c71f429962f39332f7398147762588c9f3a5c77065d0c06"}, + {file = "numexpr-2.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:178b85ad373c6903e55d75787d61b92380439b70d94b001cb055a501b0821335"}, + {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:578fe4008e4d5d6ff01bbeb2d7b7ba1ec658a5cda9c720cd26a9a8325f8ef438"}, + {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef621b4ee366a5c6a484f6678c9259f5b826569f8bfa0b89ba2306d5055468bb"}, + {file = "numexpr-2.8.5-cp311-cp311-win32.whl", hash = "sha256:dd57ab1a3d3aaa9274aff1cefbf93b8ddacc7973afef5b125905f6bf18fabab0"}, + {file = "numexpr-2.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:783324ba40eb804ecfc9ebae86120a1e339ab112d0ab8a1f0d48a26354d5bf9b"}, + {file = "numexpr-2.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:183d5430db76826e54465c69db93a3c6ecbf03cda5aa1bb96eaad0147e9b68dc"}, + {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39ce106f92ccea5b07b1d6f2f3c4370f05edf27691dc720a63903484a2137e48"}, + {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b594dc9e2d6291a0bc5c065e6d9caf3eee743b5663897832e9b17753c002947a"}, + {file = "numexpr-2.8.5-cp37-cp37m-win32.whl", hash = "sha256:62b4faf8e0627673b0210a837792bddd23050ecebc98069ab23eb0633ff1ef5f"}, + {file = "numexpr-2.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:db5c65417d69414f1ab31302ea01d3548303ef31209c38b4849d145be4e1d1ba"}, + {file = "numexpr-2.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eb36ffcfa1606e41aa08d559b4277bcad0e16b83941d1a4fee8d2bd5a34f8e0e"}, + {file = "numexpr-2.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:34af2a0e857d02a4bc5758bc037a777d50dacb13bcd57c7905268a3e44994ed6"}, + {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a8dad2bfaad5a5c34a2e8bbf62b9df1dfab266d345fda1feb20ff4e264b347a"}, + {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93f5a866cd13a808bc3d3a9c487d94cd02eec408b275ff0aa150f2e8e5191f8"}, + {file = "numexpr-2.8.5-cp38-cp38-win32.whl", hash = "sha256:558390fea6370003ac749ed9d0f38d708aa096f5dcb707ddb6e0ca5a0dd37da1"}, + {file = "numexpr-2.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:55983806815035eb63c5039520688c49536bb7f3cc3fc1d7d64c6a00cf3f353e"}, + {file = "numexpr-2.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1510da20e6f5f45333610b1ded44c566e2690c6c437c84f2a212ca09627c7e01"}, + {file = "numexpr-2.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e8b5bf7bcb4e8dcd66522d8fc96e1db7278f901cb4fd2e155efbe62a41dde08"}, + {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ed0e1c1ef5f34381448539f1fe9015906d21c9cfa2797c06194d4207dadb465"}, + {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aea6ab45c87c0a7041183c08a798f0ad4d7c5eccbce20cfe79ce6f1a45ef3702"}, + {file = "numexpr-2.8.5-cp39-cp39-win32.whl", hash = "sha256:cbfd833ee5fdb0efb862e152aee7e6ccea9c596d5c11d22604c2e6307bff7cad"}, + {file = "numexpr-2.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:283ce8609a7ccbadf91a68f3484558b3e36d27c93c98a41ec205efb0ab43c872"}, + {file = "numexpr-2.8.5.tar.gz", hash = "sha256:45ed41e55a0abcecf3d711481e12a5fb7a904fe99d42bc282a17cc5f8ea510be"}, ] [package.dependencies] @@ -877,42 +943,44 @@ numpy = ">=1.13.3" [[package]] name = "numpy" -version = "1.25.1" +version = "1.25.2" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d339465dff3eb33c701430bcb9c325b60354698340229e1dff97745e6b3efa"}, - {file = "numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d736b75c3f2cb96843a5c7f8d8ccc414768d34b0a75f466c05f3a739b406f10b"}, - {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a90725800caeaa160732d6b31f3f843ebd45d6b5f3eec9e8cc287e30f2805bf"}, - {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c6c9261d21e617c6dc5eacba35cb68ec36bb72adcff0dee63f8fbc899362588"}, - {file = "numpy-1.25.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0def91f8af6ec4bb94c370e38c575855bf1d0be8a8fbfba42ef9c073faf2cf19"}, - {file = "numpy-1.25.1-cp310-cp310-win32.whl", hash = "sha256:fd67b306320dcadea700a8f79b9e671e607f8696e98ec255915c0c6d6b818503"}, - {file = "numpy-1.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:c1516db588987450b85595586605742879e50dcce923e8973f79529651545b57"}, - {file = "numpy-1.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b82655dd8efeea69dbf85d00fca40013d7f503212bc5259056244961268b66e"}, - {file = "numpy-1.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e8f6049c4878cb16960fbbfb22105e49d13d752d4d8371b55110941fb3b17800"}, - {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41a56b70e8139884eccb2f733c2f7378af06c82304959e174f8e7370af112e09"}, - {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5154b1a25ec796b1aee12ac1b22f414f94752c5f94832f14d8d6c9ac40bcca6"}, - {file = "numpy-1.25.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38eb6548bb91c421261b4805dc44def9ca1a6eef6444ce35ad1669c0f1a3fc5d"}, - {file = "numpy-1.25.1-cp311-cp311-win32.whl", hash = "sha256:791f409064d0a69dd20579345d852c59822c6aa087f23b07b1b4e28ff5880fcb"}, - {file = "numpy-1.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:c40571fe966393b212689aa17e32ed905924120737194b5d5c1b20b9ed0fb171"}, - {file = "numpy-1.25.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3d7abcdd85aea3e6cdddb59af2350c7ab1ed764397f8eec97a038ad244d2d105"}, - {file = "numpy-1.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a180429394f81c7933634ae49b37b472d343cccb5bb0c4a575ac8bbc433722f"}, - {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d412c1697c3853c6fc3cb9751b4915859c7afe6a277c2bf00acf287d56c4e625"}, - {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e1266411120a4f16fad8efa8e0454d21d00b8c7cee5b5ccad7565d95eb42dd"}, - {file = "numpy-1.25.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f76aebc3358ade9eacf9bc2bb8ae589863a4f911611694103af05346637df1b7"}, - {file = "numpy-1.25.1-cp39-cp39-win32.whl", hash = "sha256:247d3ffdd7775bdf191f848be8d49100495114c82c2bd134e8d5d075fb386a1c"}, - {file = "numpy-1.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:1d5d3c68e443c90b38fdf8ef40e60e2538a27548b39b12b73132456847f4b631"}, - {file = "numpy-1.25.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:35a9527c977b924042170a0887de727cd84ff179e478481404c5dc66b4170009"}, - {file = "numpy-1.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d3fe3dd0506a28493d82dc3cf254be8cd0d26f4008a417385cbf1ae95b54004"}, - {file = "numpy-1.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:012097b5b0d00a11070e8f2e261128c44157a8689f7dedcf35576e525893f4fe"}, - {file = "numpy-1.25.1.tar.gz", hash = "sha256:9a3a9f3a61480cc086117b426a8bd86869c213fc4072e606f01c4e4b66eb92bf"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] [[package]] name = "openai" version = "0.27.8" description = "Python client library for the OpenAI API" +category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -927,7 +995,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -935,6 +1003,7 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "openapi-schema-pydantic" version = "1.2.4" description = "OpenAPI (v3) specification schema as pydantic class" +category = "main" optional = false python-versions = ">=3.6.1" files = [ @@ -949,6 +1018,7 @@ pydantic = ">=1.8.2" name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -956,46 +1026,32 @@ files = [ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] -[[package]] -name = "promptlayer" -version = "0.1.92" -description = "PromptLayer is a package to keep track of your GPT models training" -optional = false -python-versions = "*" -files = [ - {file = "promptlayer-0.1.92.tar.gz", hash = "sha256:81e321933b99bf8d3e52813f4d7ee047d0c8cc29956ac59fb6f4a7e6e18a9b83"}, -] - -[package.dependencies] -langchain = "*" -requests = "*" - [[package]] name = "psycopg2" -version = "2.9.6" +version = "2.9.7" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "psycopg2-2.9.6-cp310-cp310-win32.whl", hash = "sha256:f7a7a5ee78ba7dc74265ba69e010ae89dae635eea0e97b055fb641a01a31d2b1"}, - {file = "psycopg2-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:f75001a1cbbe523e00b0ef896a5a1ada2da93ccd752b7636db5a99bc57c44494"}, - {file = "psycopg2-2.9.6-cp311-cp311-win32.whl", hash = "sha256:53f4ad0a3988f983e9b49a5d9765d663bbe84f508ed655affdb810af9d0972ad"}, - {file = "psycopg2-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b81fcb9ecfc584f661b71c889edeae70bae30d3ef74fa0ca388ecda50b1222b7"}, - {file = "psycopg2-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:11aca705ec888e4f4cea97289a0bf0f22a067a32614f6ef64fcf7b8bfbc53744"}, - {file = "psycopg2-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:36c941a767341d11549c0fbdbb2bf5be2eda4caf87f65dfcd7d146828bd27f39"}, - {file = "psycopg2-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:869776630c04f335d4124f120b7fb377fe44b0a7645ab3c34b4ba42516951889"}, - {file = "psycopg2-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:a8ad4a47f42aa6aec8d061fdae21eaed8d864d4bb0f0cade5ad32ca16fcd6258"}, - {file = "psycopg2-2.9.6-cp38-cp38-win32.whl", hash = "sha256:2362ee4d07ac85ff0ad93e22c693d0f37ff63e28f0615a16b6635a645f4b9214"}, - {file = "psycopg2-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:d24ead3716a7d093b90b27b3d73459fe8cd90fd7065cf43b3c40966221d8c394"}, - {file = "psycopg2-2.9.6-cp39-cp39-win32.whl", hash = "sha256:1861a53a6a0fd248e42ea37c957d36950da00266378746588eab4f4b5649e95f"}, - {file = "psycopg2-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:ded2faa2e6dfb430af7713d87ab4abbfc764d8d7fb73eafe96a24155f906ebf5"}, - {file = "psycopg2-2.9.6.tar.gz", hash = "sha256:f15158418fd826831b28585e2ab48ed8df2d0d98f502a2b4fe619e7d5ca29011"}, + {file = "psycopg2-2.9.7-cp310-cp310-win32.whl", hash = "sha256:1a6a2d609bce44f78af4556bea0c62a5e7f05c23e5ea9c599e07678995609084"}, + {file = "psycopg2-2.9.7-cp310-cp310-win_amd64.whl", hash = "sha256:b22ed9c66da2589a664e0f1ca2465c29b75aaab36fa209d4fb916025fb9119e5"}, + {file = "psycopg2-2.9.7-cp311-cp311-win32.whl", hash = "sha256:44d93a0109dfdf22fe399b419bcd7fa589d86895d3931b01fb321d74dadc68f1"}, + {file = "psycopg2-2.9.7-cp311-cp311-win_amd64.whl", hash = "sha256:91e81a8333a0037babfc9fe6d11e997a9d4dac0f38c43074886b0d9dead94fe9"}, + {file = "psycopg2-2.9.7-cp37-cp37m-win32.whl", hash = "sha256:d1210fcf99aae6f728812d1d2240afc1dc44b9e6cba526a06fb8134f969957c2"}, + {file = "psycopg2-2.9.7-cp37-cp37m-win_amd64.whl", hash = "sha256:e9b04cbef584310a1ac0f0d55bb623ca3244c87c51187645432e342de9ae81a8"}, + {file = "psycopg2-2.9.7-cp38-cp38-win32.whl", hash = "sha256:d5c5297e2fbc8068d4255f1e606bfc9291f06f91ec31b2a0d4c536210ac5c0a2"}, + {file = "psycopg2-2.9.7-cp38-cp38-win_amd64.whl", hash = "sha256:8275abf628c6dc7ec834ea63f6f3846bf33518907a2b9b693d41fd063767a866"}, + {file = "psycopg2-2.9.7-cp39-cp39-win32.whl", hash = "sha256:c7949770cafbd2f12cecc97dea410c514368908a103acf519f2a346134caa4d5"}, + {file = "psycopg2-2.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:b6bd7d9d3a7a63faae6edf365f0ed0e9b0a1aaf1da3ca146e6b043fb3eb5d723"}, + {file = "psycopg2-2.9.7.tar.gz", hash = "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8"}, ] [[package]] name = "pydantic" version = "1.10.12" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1048,6 +1104,7 @@ email = ["email-validator (>=1.0.3)"] name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1062,6 +1119,7 @@ cli = ["click (>=5.0)"] name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1109,105 +1167,127 @@ files = [ [[package]] name = "regex" -version = "2023.6.3" +version = "2023.8.8" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "regex-2023.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:824bf3ac11001849aec3fa1d69abcb67aac3e150a933963fb12bda5151fe1bfd"}, - {file = "regex-2023.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05ed27acdf4465c95826962528f9e8d41dbf9b1aa8531a387dee6ed215a3e9ef"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b49c764f88a79160fa64f9a7b425620e87c9f46095ef9c9920542ab2495c8bc"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e3f1316c2293e5469f8f09dc2d76efb6c3982d3da91ba95061a7e69489a14ef"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43e1dd9d12df9004246bacb79a0e5886b3b6071b32e41f83b0acbf293f820ee8"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4959e8bcbfda5146477d21c3a8ad81b185cd252f3d0d6e4724a5ef11c012fb06"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af4dd387354dc83a3bff67127a124c21116feb0d2ef536805c454721c5d7993d"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2239d95d8e243658b8dbb36b12bd10c33ad6e6933a54d36ff053713f129aa536"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:890e5a11c97cf0d0c550eb661b937a1e45431ffa79803b942a057c4fb12a2da2"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a8105e9af3b029f243ab11ad47c19b566482c150c754e4c717900a798806b222"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:25be746a8ec7bc7b082783216de8e9473803706723b3f6bef34b3d0ed03d57e2"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:3676f1dd082be28b1266c93f618ee07741b704ab7b68501a173ce7d8d0d0ca18"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:10cb847aeb1728412c666ab2e2000ba6f174f25b2bdc7292e7dd71b16db07568"}, - {file = "regex-2023.6.3-cp310-cp310-win32.whl", hash = "sha256:dbbbfce33cd98f97f6bffb17801b0576e653f4fdb1d399b2ea89638bc8d08ae1"}, - {file = "regex-2023.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:c5f8037000eb21e4823aa485149f2299eb589f8d1fe4b448036d230c3f4e68e0"}, - {file = "regex-2023.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c123f662be8ec5ab4ea72ea300359023a5d1df095b7ead76fedcd8babbedf969"}, - {file = "regex-2023.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9edcbad1f8a407e450fbac88d89e04e0b99a08473f666a3f3de0fd292badb6aa"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcba6dae7de533c876255317c11f3abe4907ba7d9aa15d13e3d9710d4315ec0e"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29cdd471ebf9e0f2fb3cac165efedc3c58db841d83a518b082077e612d3ee5df"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12b74fbbf6cbbf9dbce20eb9b5879469e97aeeaa874145517563cca4029db65c"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c29ca1bd61b16b67be247be87390ef1d1ef702800f91fbd1991f5c4421ebae8"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77f09bc4b55d4bf7cc5eba785d87001d6757b7c9eec237fe2af57aba1a071d9"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ea353ecb6ab5f7e7d2f4372b1e779796ebd7b37352d290096978fea83c4dba0c"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:10590510780b7541969287512d1b43f19f965c2ece6c9b1c00fc367b29d8dce7"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2fbd6236aae3b7f9d514312cdb58e6494ee1c76a9948adde6eba33eb1c4264f"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6b2675068c8b56f6bfd5a2bda55b8accbb96c02fd563704732fd1c95e2083461"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74419d2b50ecb98360cfaa2974da8689cb3b45b9deff0dcf489c0d333bcc1477"}, - {file = "regex-2023.6.3-cp311-cp311-win32.whl", hash = "sha256:fb5ec16523dc573a4b277663a2b5a364e2099902d3944c9419a40ebd56a118f9"}, - {file = "regex-2023.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:09e4a1a6acc39294a36b7338819b10baceb227f7f7dbbea0506d419b5a1dd8af"}, - {file = "regex-2023.6.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0654bca0cdf28a5956c83839162692725159f4cda8d63e0911a2c0dc76166525"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:463b6a3ceb5ca952e66550a4532cef94c9a0c80dc156c4cc343041951aec1697"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87b2a5bb5e78ee0ad1de71c664d6eb536dc3947a46a69182a90f4410f5e3f7dd"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6343c6928282c1f6a9db41f5fd551662310e8774c0e5ebccb767002fcf663ca9"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6192d5af2ccd2a38877bfef086d35e6659566a335b1492786ff254c168b1693"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74390d18c75054947e4194019077e243c06fbb62e541d8817a0fa822ea310c14"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:742e19a90d9bb2f4a6cf2862b8b06dea5e09b96c9f2df1779e53432d7275331f"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8abbc5d54ea0ee80e37fef009e3cec5dafd722ed3c829126253d3e22f3846f1e"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c2b867c17a7a7ae44c43ebbeb1b5ff406b3e8d5b3e14662683e5e66e6cc868d3"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:d831c2f8ff278179705ca59f7e8524069c1a989e716a1874d6d1aab6119d91d1"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:ee2d1a9a253b1729bb2de27d41f696ae893507c7db224436abe83ee25356f5c1"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:61474f0b41fe1a80e8dfa70f70ea1e047387b7cd01c85ec88fa44f5d7561d787"}, - {file = "regex-2023.6.3-cp36-cp36m-win32.whl", hash = "sha256:0b71e63226e393b534105fcbdd8740410dc6b0854c2bfa39bbda6b0d40e59a54"}, - {file = "regex-2023.6.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bbb02fd4462f37060122e5acacec78e49c0fbb303c30dd49c7f493cf21fc5b27"}, - {file = "regex-2023.6.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b862c2b9d5ae38a68b92e215b93f98d4c5e9454fa36aae4450f61dd33ff48487"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:976d7a304b59ede34ca2921305b57356694f9e6879db323fd90a80f865d355a3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83320a09188e0e6c39088355d423aa9d056ad57a0b6c6381b300ec1a04ec3d16"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9427a399501818a7564f8c90eced1e9e20709ece36be701f394ada99890ea4b3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178bbc1b2ec40eaca599d13c092079bf529679bf0371c602edaa555e10b41c3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:837328d14cde912af625d5f303ec29f7e28cdab588674897baafaf505341f2fc"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d44dc13229905ae96dd2ae2dd7cebf824ee92bc52e8cf03dcead37d926da019"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d54af539295392611e7efbe94e827311eb8b29668e2b3f4cadcfe6f46df9c777"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7117d10690c38a622e54c432dfbbd3cbd92f09401d622902c32f6d377e2300ee"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bb60b503ec8a6e4e3e03a681072fa3a5adcbfa5479fa2d898ae2b4a8e24c4591"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:65ba8603753cec91c71de423a943ba506363b0e5c3fdb913ef8f9caa14b2c7e0"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:271f0bdba3c70b58e6f500b205d10a36fb4b58bd06ac61381b68de66442efddb"}, - {file = "regex-2023.6.3-cp37-cp37m-win32.whl", hash = "sha256:9beb322958aaca059f34975b0df135181f2e5d7a13b84d3e0e45434749cb20f7"}, - {file = "regex-2023.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fea75c3710d4f31389eed3c02f62d0b66a9da282521075061ce875eb5300cf23"}, - {file = "regex-2023.6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f56fcb7ff7bf7404becdfc60b1e81a6d0561807051fd2f1860b0d0348156a07"}, - {file = "regex-2023.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d2da3abc88711bce7557412310dfa50327d5769a31d1c894b58eb256459dc289"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a99b50300df5add73d307cf66abea093304a07eb017bce94f01e795090dea87c"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5708089ed5b40a7b2dc561e0c8baa9535b77771b64a8330b684823cfd5116036"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:687ea9d78a4b1cf82f8479cab23678aff723108df3edeac098e5b2498879f4a7"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3850beab9f527f06ccc94b446c864059c57651b3f911fddb8d9d3ec1d1b25d"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8915cc96abeb8983cea1df3c939e3c6e1ac778340c17732eb63bb96247b91d2"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:841d6e0e5663d4c7b4c8099c9997be748677d46cbf43f9f471150e560791f7ff"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9edce5281f965cf135e19840f4d93d55b3835122aa76ccacfd389e880ba4cf82"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b956231ebdc45f5b7a2e1f90f66a12be9610ce775fe1b1d50414aac1e9206c06"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:36efeba71c6539d23c4643be88295ce8c82c88bbd7c65e8a24081d2ca123da3f"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:cf67ca618b4fd34aee78740bea954d7c69fdda419eb208c2c0c7060bb822d747"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b4598b1897837067a57b08147a68ac026c1e73b31ef6e36deeeb1fa60b2933c9"}, - {file = "regex-2023.6.3-cp38-cp38-win32.whl", hash = "sha256:f415f802fbcafed5dcc694c13b1292f07fe0befdb94aa8a52905bd115ff41e88"}, - {file = "regex-2023.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:d4f03bb71d482f979bda92e1427f3ec9b220e62a7dd337af0aa6b47bf4498f72"}, - {file = "regex-2023.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccf91346b7bd20c790310c4147eee6ed495a54ddb6737162a36ce9dbef3e4751"}, - {file = "regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b28f5024a3a041009eb4c333863d7894d191215b39576535c6734cd88b0fcb68"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0bb18053dfcfed432cc3ac632b5e5e5c5b7e55fb3f8090e867bfd9b054dbcbf"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5bfb3004f2144a084a16ce19ca56b8ac46e6fd0651f54269fc9e230edb5e4a"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c6b48d0fa50d8f4df3daf451be7f9689c2bde1a52b1225c5926e3f54b6a9ed1"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051da80e6eeb6e239e394ae60704d2b566aa6a7aed6f2890a7967307267a5dc6"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4c3b7fa4cdaa69268748665a1a6ff70c014d39bb69c50fda64b396c9116cf77"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:457b6cce21bee41ac292d6753d5e94dcbc5c9e3e3a834da285b0bde7aa4a11e9"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aad51907d74fc183033ad796dd4c2e080d1adcc4fd3c0fd4fd499f30c03011cd"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0385e73da22363778ef2324950e08b689abdf0b108a7d8decb403ad7f5191938"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c6a57b742133830eec44d9b2290daf5cbe0a2f1d6acee1b3c7b1c7b2f3606df7"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3e5219bf9e75993d73ab3d25985c857c77e614525fac9ae02b1bebd92f7cecac"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e5087a3c59eef624a4591ef9eaa6e9a8d8a94c779dade95d27c0bc24650261cd"}, - {file = "regex-2023.6.3-cp39-cp39-win32.whl", hash = "sha256:20326216cc2afe69b6e98528160b225d72f85ab080cbdf0b11528cbbaba2248f"}, - {file = "regex-2023.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:bdff5eab10e59cf26bc479f565e25ed71a7d041d1ded04ccf9aee1d9f208487a"}, - {file = "regex-2023.6.3.tar.gz", hash = "sha256:72d1a25bf36d2050ceb35b517afe13864865268dfb45910e2e17a84be6cbfeb0"}, + {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"}, + {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c662a4cbdd6280ee56f841f14620787215a171c4e2d1744c9528bed8f5816c96"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf0633e4a1b667bfe0bb10b5e53fe0d5f34a6243ea2530eb342491f1adf4f739"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551ad543fa19e94943c5b2cebc54c73353ffff08228ee5f3376bd27b3d5b9800"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54de2619f5ea58474f2ac211ceea6b615af2d7e4306220d4f3fe690c91988a61"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ec4b3f0aebbbe2fc0134ee30a791af522a92ad9f164858805a77442d7d18570"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ae646c35cb9f820491760ac62c25b6d6b496757fda2d51be429e0e7b67ae0ab"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca339088839582d01654e6f83a637a4b8194d0960477b9769d2ff2cfa0fa36d2"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d9b6627408021452dcd0d2cdf8da0534e19d93d070bfa8b6b4176f99711e7f90"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:bd3366aceedf274f765a3a4bc95d6cd97b130d1dda524d8f25225d14123c01db"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7aed90a72fc3654fba9bc4b7f851571dcc368120432ad68b226bd593f3f6c0b7"}, + {file = "regex-2023.8.8-cp310-cp310-win32.whl", hash = "sha256:80b80b889cb767cc47f31d2b2f3dec2db8126fbcd0cff31b3925b4dc6609dcdb"}, + {file = "regex-2023.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:b82edc98d107cbc7357da7a5a695901b47d6eb0420e587256ba3ad24b80b7d0b"}, + {file = "regex-2023.8.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1e7d84d64c84ad97bf06f3c8cb5e48941f135ace28f450d86af6b6512f1c9a71"}, + {file = "regex-2023.8.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce0f9fbe7d295f9922c0424a3637b88c6c472b75eafeaff6f910494a1fa719ef"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06c57e14ac723b04458df5956cfb7e2d9caa6e9d353c0b4c7d5d54fcb1325c46"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7a9aaa5a1267125eef22cef3b63484c3241aaec6f48949b366d26c7250e0357"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b7408511fca48a82a119d78a77c2f5eb1b22fe88b0d2450ed0756d194fe7a9a"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14dc6f2d88192a67d708341f3085df6a4f5a0c7b03dec08d763ca2cd86e9f559"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48c640b99213643d141550326f34f0502fedb1798adb3c9eb79650b1ecb2f177"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0085da0f6c6393428bf0d9c08d8b1874d805bb55e17cb1dfa5ddb7cfb11140bf"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:964b16dcc10c79a4a2be9f1273fcc2684a9eedb3906439720598029a797b46e6"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7ce606c14bb195b0e5108544b540e2c5faed6843367e4ab3deb5c6aa5e681208"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40f029d73b10fac448c73d6eb33d57b34607f40116e9f6e9f0d32e9229b147d7"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3b8e6ea6be6d64104d8e9afc34c151926f8182f84e7ac290a93925c0db004bfd"}, + {file = "regex-2023.8.8-cp311-cp311-win32.whl", hash = "sha256:942f8b1f3b223638b02df7df79140646c03938d488fbfb771824f3d05fc083a8"}, + {file = "regex-2023.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:51d8ea2a3a1a8fe4f67de21b8b93757005213e8ac3917567872f2865185fa7fb"}, + {file = "regex-2023.8.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e951d1a8e9963ea51efd7f150450803e3b95db5939f994ad3d5edac2b6f6e2b4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704f63b774218207b8ccc6c47fcef5340741e5d839d11d606f70af93ee78e4d4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22283c769a7b01c8ac355d5be0715bf6929b6267619505e289f792b01304d898"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91129ff1bb0619bc1f4ad19485718cc623a2dc433dff95baadbf89405c7f6b57"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de35342190deb7b866ad6ba5cbcccb2d22c0487ee0cbb251efef0843d705f0d4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b993b6f524d1e274a5062488a43e3f9f8764ee9745ccd8e8193df743dbe5ee61"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3026cbcf11d79095a32d9a13bbc572a458727bd5b1ca332df4a79faecd45281c"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:293352710172239bf579c90a9864d0df57340b6fd21272345222fb6371bf82b3"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d909b5a3fff619dc7e48b6b1bedc2f30ec43033ba7af32f936c10839e81b9217"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3d370ff652323c5307d9c8e4c62efd1956fb08051b0e9210212bc51168b4ff56"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:b076da1ed19dc37788f6a934c60adf97bd02c7eea461b73730513921a85d4235"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e9941a4ada58f6218694f382e43fdd256e97615db9da135e77359da257a7168b"}, + {file = "regex-2023.8.8-cp36-cp36m-win32.whl", hash = "sha256:a8c65c17aed7e15a0c824cdc63a6b104dfc530f6fa8cb6ac51c437af52b481c7"}, + {file = "regex-2023.8.8-cp36-cp36m-win_amd64.whl", hash = "sha256:aadf28046e77a72f30dcc1ab185639e8de7f4104b8cb5c6dfa5d8ed860e57236"}, + {file = "regex-2023.8.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:423adfa872b4908843ac3e7a30f957f5d5282944b81ca0a3b8a7ccbbfaa06103"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ae594c66f4a7e1ea67232a0846649a7c94c188d6c071ac0210c3e86a5f92109"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e51c80c168074faa793685656c38eb7a06cbad7774c8cbc3ea05552d615393d8"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09b7f4c66aa9d1522b06e31a54f15581c37286237208df1345108fcf4e050c18"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e73e5243af12d9cd6a9d6a45a43570dbe2e5b1cdfc862f5ae2b031e44dd95a8"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:941460db8fe3bd613db52f05259c9336f5a47ccae7d7def44cc277184030a116"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f0ccf3e01afeb412a1a9993049cb160d0352dba635bbca7762b2dc722aa5742a"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e9216e0d2cdce7dbc9be48cb3eacb962740a09b011a116fd7af8c832ab116ca"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5cd9cd7170459b9223c5e592ac036e0704bee765706445c353d96f2890e816c8"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4873ef92e03a4309b3ccd8281454801b291b689f6ad45ef8c3658b6fa761d7ac"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:239c3c2a339d3b3ddd51c2daef10874410917cd2b998f043c13e2084cb191684"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1005c60ed7037be0d9dea1f9c53cc42f836188227366370867222bda4c3c6bd7"}, + {file = "regex-2023.8.8-cp37-cp37m-win32.whl", hash = "sha256:e6bd1e9b95bc5614a7a9c9c44fde9539cba1c823b43a9f7bc11266446dd568e3"}, + {file = "regex-2023.8.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9a96edd79661e93327cfeac4edec72a4046e14550a1d22aa0dd2e3ca52aec921"}, + {file = "regex-2023.8.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2181c20ef18747d5f4a7ea513e09ea03bdd50884a11ce46066bb90fe4213675"}, + {file = "regex-2023.8.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2ad5add903eb7cdde2b7c64aaca405f3957ab34f16594d2b78d53b8b1a6a7d6"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9233ac249b354c54146e392e8a451e465dd2d967fc773690811d3a8c240ac601"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920974009fb37b20d32afcdf0227a2e707eb83fe418713f7a8b7de038b870d0b"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2b6c5dfe0929b6c23dde9624483380b170b6e34ed79054ad131b20203a1a63"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96979d753b1dc3b2169003e1854dc67bfc86edf93c01e84757927f810b8c3c93"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ae54a338191e1356253e7883d9d19f8679b6143703086245fb14d1f20196be9"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2162ae2eb8b079622176a81b65d486ba50b888271302190870b8cc488587d280"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c884d1a59e69e03b93cf0dfee8794c63d7de0ee8f7ffb76e5f75be8131b6400a"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf9273e96f3ee2ac89ffcb17627a78f78e7516b08f94dc435844ae72576a276e"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:83215147121e15d5f3a45d99abeed9cf1fe16869d5c233b08c56cdf75f43a504"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f7454aa427b8ab9101f3787eb178057c5250478e39b99540cfc2b889c7d0586"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0640913d2c1044d97e30d7c41728195fc37e54d190c5385eacb52115127b882"}, + {file = "regex-2023.8.8-cp38-cp38-win32.whl", hash = "sha256:0c59122ceccb905a941fb23b087b8eafc5290bf983ebcb14d2301febcbe199c7"}, + {file = "regex-2023.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:c12f6f67495ea05c3d542d119d270007090bad5b843f642d418eb601ec0fa7be"}, + {file = "regex-2023.8.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82cd0a69cd28f6cc3789cc6adeb1027f79526b1ab50b1f6062bbc3a0ccb2dbc3"}, + {file = "regex-2023.8.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb34d1605f96a245fc39790a117ac1bac8de84ab7691637b26ab2c5efb8f228c"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:987b9ac04d0b38ef4f89fbc035e84a7efad9cdd5f1e29024f9289182c8d99e09"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dd6082f4e2aec9b6a0927202c85bc1b09dcab113f97265127c1dc20e2e32495"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7eb95fe8222932c10d4436e7a6f7c99991e3fdd9f36c949eff16a69246dee2dc"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7098c524ba9f20717a56a8d551d2ed491ea89cbf37e540759ed3b776a4f8d6eb"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b694430b3f00eb02c594ff5a16db30e054c1b9589a043fe9174584c6efa8033"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2aeab3895d778155054abea5238d0eb9a72e9242bd4b43f42fd911ef9a13470"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:988631b9d78b546e284478c2ec15c8a85960e262e247b35ca5eaf7ee22f6050a"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:67ecd894e56a0c6108ec5ab1d8fa8418ec0cff45844a855966b875d1039a2e34"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:14898830f0a0eb67cae2bbbc787c1a7d6e34ecc06fbd39d3af5fe29a4468e2c9"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f2200e00b62568cfd920127782c61bc1c546062a879cdc741cfcc6976668dfcf"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9691a549c19c22d26a4f3b948071e93517bdf86e41b81d8c6ac8a964bb71e5a6"}, + {file = "regex-2023.8.8-cp39-cp39-win32.whl", hash = "sha256:6ab2ed84bf0137927846b37e882745a827458689eb969028af8032b1b3dac78e"}, + {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"}, + {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"}, ] +[[package]] +name = "replicate" +version = "0.11.0" +description = "Python client for Replicate" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "replicate-0.11.0-py3-none-any.whl", hash = "sha256:fbb8815068864dc822cd4fa7b6103d6f4089d6ef122abd6c3441ca0f0f110c46"}, + {file = "replicate-0.11.0.tar.gz", hash = "sha256:4d54b5838c1552a6f76cc37c3af8d9a7998105382082d672acad31636ad443b5"}, +] + +[package.dependencies] +packaging = "*" +pydantic = ">1" +requests = ">2" + +[package.extras] +dev = ["black", "mypy", "pytest", "responses", "ruff"] + [[package]] name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1229,6 +1309,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1240,6 +1321,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1249,61 +1331,62 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.19" +version = "2.0.20" description = "Database Abstraction Library" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9deaae357edc2091a9ed5d25e9ee8bba98bcfae454b3911adeaf159c2e9ca9e3"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bf0fd65b50a330261ec7fe3d091dfc1c577483c96a9fa1e4323e932961aa1b5"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d90ccc15ba1baa345796a8fb1965223ca7ded2d235ccbef80a47b85cea2d71a"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4e688f6784427e5f9479d1a13617f573de8f7d4aa713ba82813bcd16e259d1"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:584f66e5e1979a7a00f4935015840be627e31ca29ad13f49a6e51e97a3fb8cae"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c69ce70047b801d2aba3e5ff3cba32014558966109fecab0c39d16c18510f15"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-win32.whl", hash = "sha256:96f0463573469579d32ad0c91929548d78314ef95c210a8115346271beeeaaa2"}, - {file = "SQLAlchemy-2.0.19-cp310-cp310-win_amd64.whl", hash = "sha256:22bafb1da60c24514c141a7ff852b52f9f573fb933b1e6b5263f0daa28ce6db9"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d6894708eeb81f6d8193e996257223b6bb4041cb05a17cd5cf373ed836ef87a2"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8f2afd1aafded7362b397581772c670f20ea84d0a780b93a1a1529da7c3d369"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15afbf5aa76f2241184c1d3b61af1a72ba31ce4161013d7cb5c4c2fca04fd6e"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc05b59142445a4efb9c1fd75c334b431d35c304b0e33f4fa0ff1ea4890f92e"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5831138f0cc06b43edf5f99541c64adf0ab0d41f9a4471fd63b54ae18399e4de"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3afa8a21a9046917b3a12ffe016ba7ebe7a55a6fc0c7d950beb303c735c3c3ad"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-win32.whl", hash = "sha256:c896d4e6ab2eba2afa1d56be3d0b936c56d4666e789bfc59d6ae76e9fcf46145"}, - {file = "SQLAlchemy-2.0.19-cp311-cp311-win_amd64.whl", hash = "sha256:024d2f67fb3ec697555e48caeb7147cfe2c08065a4f1a52d93c3d44fc8e6ad1c"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:89bc2b374ebee1a02fd2eae6fd0570b5ad897ee514e0f84c5c137c942772aa0c"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd4d410a76c3762511ae075d50f379ae09551d92525aa5bb307f8343bf7c2c12"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f469f15068cd8351826df4080ffe4cc6377c5bf7d29b5a07b0e717dddb4c7ea2"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cda283700c984e699e8ef0fcc5c61f00c9d14b6f65a4f2767c97242513fcdd84"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:43699eb3f80920cc39a380c159ae21c8a8924fe071bccb68fc509e099420b148"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-win32.whl", hash = "sha256:61ada5831db36d897e28eb95f0f81814525e0d7927fb51145526c4e63174920b"}, - {file = "SQLAlchemy-2.0.19-cp37-cp37m-win_amd64.whl", hash = "sha256:57d100a421d9ab4874f51285c059003292433c648df6abe6c9c904e5bd5b0828"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:16a310f5bc75a5b2ce7cb656d0e76eb13440b8354f927ff15cbaddd2523ee2d1"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf7b5e3856cbf1876da4e9d9715546fa26b6e0ba1a682d5ed2fc3ca4c7c3ec5b"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e7b69d9ced4b53310a87117824b23c509c6fc1f692aa7272d47561347e133b6"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9eb4575bfa5afc4b066528302bf12083da3175f71b64a43a7c0badda2be365"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6b54d1ad7a162857bb7c8ef689049c7cd9eae2f38864fc096d62ae10bc100c7d"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5d6afc41ca0ecf373366fd8e10aee2797128d3ae45eb8467b19da4899bcd1ee0"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-win32.whl", hash = "sha256:430614f18443b58ceb9dedec323ecddc0abb2b34e79d03503b5a7579cd73a531"}, - {file = "SQLAlchemy-2.0.19-cp38-cp38-win_amd64.whl", hash = "sha256:eb60699de43ba1a1f77363f563bb2c652f7748127ba3a774f7cf2c7804aa0d3d"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a752b7a9aceb0ba173955d4f780c64ee15a1a991f1c52d307d6215c6c73b3a4c"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7351c05db355da112e056a7b731253cbeffab9dfdb3be1e895368513c7d70106"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa51ce4aea583b0c6b426f4b0563d3535c1c75986c4373a0987d84d22376585b"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae7473a67cd82a41decfea58c0eac581209a0aa30f8bc9190926fbf628bb17f7"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:851a37898a8a39783aab603c7348eb5b20d83c76a14766a43f56e6ad422d1ec8"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:539010665c90e60c4a1650afe4ab49ca100c74e6aef882466f1de6471d414be7"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-win32.whl", hash = "sha256:f82c310ddf97b04e1392c33cf9a70909e0ae10a7e2ddc1d64495e3abdc5d19fb"}, - {file = "SQLAlchemy-2.0.19-cp39-cp39-win_amd64.whl", hash = "sha256:8e712cfd2e07b801bc6b60fdf64853bc2bd0af33ca8fa46166a23fe11ce0dbb0"}, - {file = "SQLAlchemy-2.0.19-py3-none-any.whl", hash = "sha256:314145c1389b021a9ad5aa3a18bac6f5d939f9087d7fc5443be28cba19d2c972"}, - {file = "SQLAlchemy-2.0.19.tar.gz", hash = "sha256:77a14fa20264af73ddcdb1e2b9c5a829b8cc6b8304d0f093271980e36c200a3f"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759b51346aa388c2e606ee206c0bc6f15a5299f6174d1e10cadbe4530d3c7a98"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1506e988ebeaaf316f183da601f24eedd7452e163010ea63dbe52dc91c7fc70e"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5768c268df78bacbde166b48be788b83dddaa2a5974b8810af422ddfe68a9bc8"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f0dd6d15b6dc8b28a838a5c48ced7455c3e1fb47b89da9c79cc2090b072a50"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:243d0fb261f80a26774829bc2cee71df3222587ac789b7eaf6555c5b15651eed"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6eb6d77c31e1bf4268b4d61b549c341cbff9842f8e115ba6904249c20cb78a61"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-win32.whl", hash = "sha256:bcb04441f370cbe6e37c2b8d79e4af9e4789f626c595899d94abebe8b38f9a4d"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-win_amd64.whl", hash = "sha256:d32b5ffef6c5bcb452723a496bad2d4c52b346240c59b3e6dba279f6dcc06c14"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd81466bdbc82b060c3c110b2937ab65ace41dfa7b18681fdfad2f37f27acdd7"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fe7d61dc71119e21ddb0094ee994418c12f68c61b3d263ebaae50ea8399c4d4"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4e571af672e1bb710b3cc1a9794b55bce1eae5aed41a608c0401885e3491179"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3364b7066b3c7f4437dd345d47271f1251e0cfb0aba67e785343cdbdb0fff08c"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1be86ccea0c965a1e8cd6ccf6884b924c319fcc85765f16c69f1ae7148eba64b"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1d35d49a972649b5080557c603110620a86aa11db350d7a7cb0f0a3f611948a0"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-win32.whl", hash = "sha256:27d554ef5d12501898d88d255c54eef8414576f34672e02fe96d75908993cf53"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-win_amd64.whl", hash = "sha256:411e7f140200c02c4b953b3dbd08351c9f9818d2bd591b56d0fa0716bd014f1e"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c6aceebbc47db04f2d779db03afeaa2c73ea3f8dcd3987eb9efdb987ffa09a3"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d3f175410a6db0ad96b10bfbb0a5530ecd4fcf1e2b5d83d968dd64791f810ed"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea8186be85da6587456c9ddc7bf480ebad1a0e6dcbad3967c4821233a4d4df57"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c3d99ba99007dab8233f635c32b5cd24fb1df8d64e17bc7df136cedbea427897"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:76fdfc0f6f5341987474ff48e7a66c3cd2b8a71ddda01fa82fedb180b961630a"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-win32.whl", hash = "sha256:d3793dcf5bc4d74ae1e9db15121250c2da476e1af8e45a1d9a52b1513a393459"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-win_amd64.whl", hash = "sha256:79fde625a0a55220d3624e64101ed68a059c1c1f126c74f08a42097a72ff66a9"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:599ccd23a7146e126be1c7632d1d47847fa9f333104d03325c4e15440fc7d927"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1a58052b5a93425f656675673ef1f7e005a3b72e3f2c91b8acca1b27ccadf5f4"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79543f945be7a5ada9943d555cf9b1531cfea49241809dd1183701f94a748624"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63e73da7fb030ae0a46a9ffbeef7e892f5def4baf8064786d040d45c1d6d1dc5"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3ce5e81b800a8afc870bb8e0a275d81957e16f8c4b62415a7b386f29a0cb9763"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb0d3e94c2a84215532d9bcf10229476ffd3b08f481c53754113b794afb62d14"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-win32.whl", hash = "sha256:8dd77fd6648b677d7742d2c3cc105a66e2681cc5e5fb247b88c7a7b78351cf74"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-win_amd64.whl", hash = "sha256:6f8a934f9dfdf762c844e5164046a9cea25fabbc9ec865c023fe7f300f11ca4a"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:26a3399eaf65e9ab2690c07bd5cf898b639e76903e0abad096cd609233ce5208"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4cde2e1096cbb3e62002efdb7050113aa5f01718035ba9f29f9d89c3758e7e4e"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b09ba72e4e6d341bb5bdd3564f1cea6095d4c3632e45dc69375a1dbe4e26ec"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b74eeafaa11372627ce94e4dc88a6751b2b4d263015b3523e2b1e57291102f0"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:77d37c1b4e64c926fa3de23e8244b964aab92963d0f74d98cbc0783a9e04f501"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eefebcc5c555803065128401a1e224a64607259b5eb907021bf9b175f315d2a6"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-win32.whl", hash = "sha256:3423dc2a3b94125094897118b52bdf4d37daf142cbcf26d48af284b763ab90e9"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-win_amd64.whl", hash = "sha256:5ed61e3463021763b853628aef8bc5d469fe12d95f82c74ef605049d810f3267"}, + {file = "SQLAlchemy-2.0.20-py3-none-any.whl", hash = "sha256:63a368231c53c93e2b67d0c5556a9836fdcd383f7e3026a39602aad775b14acf"}, + {file = "SQLAlchemy-2.0.20.tar.gz", hash = "sha256:ca8a5ff2aa7f3ade6c498aaafce25b1eaeabe4e42b73e25519183e4566a16fc6"}, ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} mypy = {version = ">=0.910", optional = true, markers = "extra == \"mypy\""} typing-extensions = ">=4.2.0" [package.extras] -aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] @@ -1330,6 +1413,7 @@ sqlcipher = ["sqlcipher3-binary"] name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1345,13 +1429,14 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam [[package]] name = "tenacity" -version = "8.2.2" +version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "tenacity-8.2.2-py3-none-any.whl", hash = "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0"}, - {file = "tenacity-8.2.2.tar.gz", hash = "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0"}, + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, ] [package.extras] @@ -1361,6 +1446,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tiktoken" version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1406,6 +1492,7 @@ blobfile = ["blobfile (>=2)"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1415,20 +1502,21 @@ files = [ [[package]] name = "tqdm" -version = "4.65.0" +version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, - {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] @@ -1437,6 +1525,7 @@ telegram = ["requests"] name = "typing-extensions" version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1448,6 +1537,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." +category = "main" optional = false python-versions = "*" files = [ @@ -1463,6 +1553,7 @@ typing-extensions = ">=3.7.4" name = "urllib3" version = "2.0.4" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1478,18 +1569,20 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.22.0" +version = "0.23.2" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"}, - {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"}, + {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"}, + {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"}, ] [package.dependencies] click = ">=7.0" h11 = ">=0.8" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] @@ -1498,6 +1591,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "yarl" version = "1.9.2" description = "Yet another URL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1584,4 +1678,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "856689b2b909f48940783c8da91c4774528e50a3fc0dc08e425f570827a83b09" +content-hash = "87a7b3b9660b3adad9c83f2095febfcb80d6bf6b75c30be1a3f29b1716da9aa2" diff --git a/module_programming_llm/pyproject.toml b/module_programming_llm/pyproject.toml index 43fec6321..425b47051 100644 --- a/module_programming_llm/pyproject.toml +++ b/module_programming_llm/pyproject.toml @@ -9,11 +9,12 @@ license = "MIT" python = "^3.10" athena = {path = "../athena", develop = true} openai = "^0.27.8" -langchain = "0.0.225" +langchain = "^0.0.267" +python-dotenv = "^1.0.0" +nltk = "^3.8.1" gitpython = "^3.1.31" +replicate = "^0.11.0" tiktoken = "^0.4.0" -promptlayer = "^0.1.85" -python-dotenv = "^1.0.0" [tool.poetry.scripts] module = "athena:run_module" From 5eb465d76e5c41b202f46a8f6099942c1a14d64d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 16:10:08 +0200 Subject: [PATCH 02/51] add some changes --- env_example/module_programming_llm.env | 35 +- module_programming_llm/.env.example | 34 +- .../module_programming_llm/__main__.py | 35 +- .../basic/basic_feedback_provider.py | 182 +++--- .../basic/file_instructions.py | 59 -- .../basic/prompts/basic_feedback_provider.py | 40 -- .../generate_file_grading_instructions.py | 15 - .../generate_file_problem_statements.py | 15 - .../module_programming_llm/config.py | 69 +++ .../generate_suggestions_by_file.py | 179 ++++++ .../helpers/llm_utils.py | 152 +++++ .../module_programming_llm/helpers/models.py | 36 -- .../helpers/models/__init__.py | 36 ++ .../helpers/models/model_config.py | 10 + .../helpers/models/openai.py | 321 +++++++++++ .../helpers/models/replicate.py | 110 ++++ .../module_programming_llm/helpers/utils.py | 2 +- .../prompts/generate_suggestions_by_file.py | 27 + .../split_grading_instructions_by_file.py | 13 + .../split_problem_statement_by_file.py | 13 + .../split_grading_instructions_by_file.py | 95 +++ .../split_problem_statement_by_file.py | 95 +++ module_programming_llm/poetry.lock | 542 +++++++++++++++++- module_programming_llm/pyproject.toml | 3 + 24 files changed, 1827 insertions(+), 291 deletions(-) delete mode 100644 module_programming_llm/module_programming_llm/basic/file_instructions.py delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py delete mode 100644 module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py create mode 100644 module_programming_llm/module_programming_llm/config.py create mode 100644 module_programming_llm/module_programming_llm/generate_suggestions_by_file.py create mode 100644 module_programming_llm/module_programming_llm/helpers/llm_utils.py delete mode 100644 module_programming_llm/module_programming_llm/helpers/models.py create mode 100644 module_programming_llm/module_programming_llm/helpers/models/__init__.py create mode 100644 module_programming_llm/module_programming_llm/helpers/models/model_config.py create mode 100644 module_programming_llm/module_programming_llm/helpers/models/openai.py create mode 100644 module_programming_llm/module_programming_llm/helpers/models/replicate.py create mode 100644 module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py create mode 100644 module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py create mode 100644 module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py create mode 100644 module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py create mode 100644 module_programming_llm/module_programming_llm/split_problem_statement_by_file.py diff --git a/env_example/module_programming_llm.env b/env_example/module_programming_llm.env index a4d7be5f5..0df776702 100644 --- a/env_example/module_programming_llm.env +++ b/env_example/module_programming_llm.env @@ -2,10 +2,33 @@ PRODUCTION=1 SECRET=12345abcdef DATABASE_URL=postgresql://postgres:password@postgres:5432/athena -OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -# Can be empty if Azure API is not used -OPENAI_API_TYPE="azure" # change to "azure" if Azure is used -OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed -OPENAI_API_VERSION="2023-03-15-preview" # change version if needed -AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name \ No newline at end of file +################################################################ +# LLM Credentials # +################################################################ + +# Default model to use +# See below for options, available models are also logged on startup +LLM_DEFAULT_MODEL="azure_openai_gpt-35" + +# Standard OpenAI (Non-Azure) [leave blank if not used] +# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` +# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models) +LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Azure OpenAI [leave blank if not used] +# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35` +LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed +LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed + +# Replicate [leave blank if not used] +# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py` +REPLICATE_API_TOKEN= + +# LangSmith (can be used for tracing LLMs) [leave blank if not used] +# See https://docs.smith.langchain.com +# LANGCHAIN_TRACING_V2=true +# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +# LANGCHAIN_API_KEY="XXX" +# LANGCHAIN_PROJECT="XXX" \ No newline at end of file diff --git a/module_programming_llm/.env.example b/module_programming_llm/.env.example index f7b7f048b..2b6d9a889 100644 --- a/module_programming_llm/.env.example +++ b/module_programming_llm/.env.example @@ -6,10 +6,32 @@ SECRET=12345abcdef DATABASE_URL=sqlite:///../data/data.sqlite -OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +################################################################ +# LLM Credentials # +################################################################ -# Can be empty if Azure API is not used -OPENAI_API_TYPE="azure" # change to "azure" if Azure is used -OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed -OPENAI_API_VERSION="2023-03-15-preview" # change version if needed -AZURE_DEPLOYMENT_NAME="gpt-35" # change to deployment name \ No newline at end of file +# Default model to use +# See below for options, available models are also logged on startup +LLM_DEFAULT_MODEL="azure_openai_gpt-35" + +# Standard OpenAI (Non-Azure) [leave blank if not used] +# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` +# A list of models can be found in `module_programming_llm/helpers/models/openai.py` (openai_models) +LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Azure OpenAI [leave blank if not used] +# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35` +LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed +LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed + +# Replicate [leave blank if not used] +# See https://replicate.com and adjust model config options in `module_programming_llm/helpers/models/replicate.py` +REPLICATE_API_TOKEN= + +# LangSmith (can be used for tracing LLMs) [leave blank if not used] +# See https://docs.smith.langchain.com +# LANGCHAIN_TRACING_V2=true +# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +# LANGCHAIN_API_KEY="XXX" +# LANGCHAIN_PROJECT="XXX" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index ca08b7a04..e4fb070fe 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -1,50 +1,43 @@ from typing import List +import tiktoken + from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider -from athena.storage import store_exercise from athena.programming import Exercise, Submission, Feedback from athena.logger import logger +from module_programming_llm.config import Configuration -from module_programming_llm.basic.basic_feedback_provider import suggest_feedback as suggest_feedback_basic -from module_programming_llm.basic.file_instructions import generate_file_grading_instructions, generate_file_problem_statements +from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file +from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed +from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed @submissions_consumer -def receive_submissions(exercise: Exercise, submissions: List[Submission]): +def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration): logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id) - # Split problem statements and grading instructions - exercise.meta['file_grading_instructions'] = generate_file_grading_instructions(exercise) - exercise.meta['file_problem_statements'] = generate_file_problem_statements(exercise) - - store_exercise(exercise) + # Split problem statements and grading instructions for later + generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) + generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) @submission_selector def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission: logger.info("select_submission: Received %d, submissions for exercise %d", len(submissions), exercise.id) - # Always return the first submission return submissions[0] @feedback_consumer def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]): - logger.info("process_feedback: Received feedbacks for submission %d of exercise %d.", submission.id, exercise.id) - logger.info("process_feedback: Feedbacks: %s", feedbacks) - # Do something with the feedback + logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id) @feedback_provider -async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: +async def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]: logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id) - # Do something with the submission and return a list of feedback - - # Check if file based grading instructions and problem statements are available - if 'file_grading_instructions' in exercise.meta and 'file_problem_statements' in exercise.meta: - return await suggest_feedback_basic(exercise, submission) - logger.info("suggest_feedback: No file based grading instructions and problem statements available. Skipping feedback generation.") - return [] + return await generate_suggestions_by_file(exercise, submission, module_config.approach, module_config.debug) if __name__ == "__main__": + tiktoken.get_encoding("cl100k_base") app.start() diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py index 33fc8564c..134fd0e41 100644 --- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py +++ b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py @@ -1,110 +1,110 @@ -import json -from typing import List +# import json +# from typing import List -from langchain.chains import LLMChain -from langchain.prompts import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) +# from langchain.chains import LLMChain +# from langchain.prompts import ( +# ChatPromptTemplate, +# SystemMessagePromptTemplate, +# HumanMessagePromptTemplate, +# ) -from athena.programming import Exercise, Submission, Feedback -from athena.logger import logger +# from athena.programming import Exercise, Submission, Feedback +# from athena.logger import logger -from module_programming_llm.helpers.utils import get_diff, get_file_extension, load_files_from_repo, add_line_numbers -from module_programming_llm.helpers.models import chat +# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers +# from module_programming_llm.helpers.models import chat -from .prompts.basic_feedback_provider import system_template, human_template +# from ..prompts.basic_feedback_provider import system_template, human_template -async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: - max_prompt_length = 2560 - input_list: List[dict] = [] +# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: +# max_prompt_length = 2560 +# input_list: List[dict] = [] - if exercise.meta['file_grading_instructions'] is None: - raise ValueError("No file grading instructions found for exercise in meta.") - if exercise.meta['file_problem_statements'] is None: - raise ValueError("No file problem statements found for exercise in meta.") +# if exercise.meta['file_grading_instructions'] is None: +# raise ValueError("No file grading instructions found for exercise in meta.") +# if exercise.meta['file_problem_statements'] is None: +# raise ValueError("No file problem statements found for exercise in meta.") - # Feature extraction - solution_repo = exercise.get_solution_repository() - template_repo = exercise.get_template_repository() - submission_repo = submission.get_repository() +# # Feature extraction +# solution_repo = exercise.get_solution_repository() +# template_repo = exercise.get_template_repository() +# submission_repo = submission.get_repository() - file_extension = get_file_extension(exercise.programming_language) - if file_extension is None: - raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") +# file_extension = get_programming_language_file_extension(exercise.programming_language) +# if file_extension is None: +# raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") - for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items(): - if submission_content is None: - continue +# for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items(): +# if submission_content is None: +# continue - problem_statement = exercise.meta['file_problem_statements'].get(file_path) - if problem_statement is None: - logger.info("No problem statement for %s, skipping.", file_path) - continue +# problem_statement = exercise.meta['file_problem_statements'].get(file_path) +# if problem_statement is None: +# logger.info("No problem statement for %s, skipping.", file_path) +# continue - grading_instructions = exercise.meta['file_grading_instructions'].get(file_path) - if grading_instructions is None: - logger.info("No grading instructions for %s, skipping.", file_path) - continue +# grading_instructions = exercise.meta['file_grading_instructions'].get(file_path) +# if grading_instructions is None: +# logger.info("No grading instructions for %s, skipping.", file_path) +# continue - submission_content = add_line_numbers(submission_content) - solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) - template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) +# submission_content = add_line_numbers(submission_content) +# solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) +# template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) - input_list.append({ - "file_path": file_path, - "submission_content": submission_content, - "solution_to_submission_diff": solution_to_submission_diff, - "template_to_submission_diff": template_to_submission_diff, - "grading_instructions": grading_instructions, - "problem_statement": problem_statement, - }) +# input_list.append({ +# "file_path": file_path, +# "submission_content": submission_content, +# "solution_to_submission_diff": solution_to_submission_diff, +# "template_to_submission_diff": template_to_submission_diff, +# "grading_instructions": grading_instructions, +# "problem_statement": problem_statement, +# }) - system_message_prompt = SystemMessagePromptTemplate.from_template(system_template) - human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) - chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) +# system_message_prompt = SystemMessagePromptTemplate.from_template(system_template) +# human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) +# chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - # Filter long prompts - input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] +# # Filter long prompts +# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] - # Completion - chain = LLMChain(llm=chat, prompt=chat_prompt) - if not input_list: - return [] - result = await chain.agenerate(input_list) +# # Completion +# chain = LLMChain(llm=chat, prompt=chat_prompt) +# if not input_list: +# return [] +# result = await chain.agenerate(input_list) - # Parse result - feedback_proposals: List[Feedback] = [] - for input, generations in zip(input_list, result.generations): - file_path = input["file_path"] - for generation in generations: - try: - feedbacks = json.loads(generation.text) - except json.JSONDecodeError: - logger.error("Failed to parse feedback json: %s", generation.text) - continue - if not isinstance(feedbacks, list): - logger.error("Feedback json is not a list: %s", generation.text) - continue +# # Parse result +# feedback_proposals: List[Feedback] = [] +# for input, generations in zip(input_list, result.generations): +# file_path = input["file_path"] +# for generation in generations: +# try: +# feedbacks = json.loads(generation.text) +# except json.JSONDecodeError: +# logger.error("Failed to parse feedback json: %s", generation.text) +# continue +# if not isinstance(feedbacks, list): +# logger.error("Feedback json is not a list: %s", generation.text) +# continue - for feedback in feedbacks: - line = feedback.get("line", None) - description = feedback.get("text", None) - credits = feedback.get("credits", 0.0) - feedback_proposals.append( - Feedback( - id=None, - exercise_id=exercise.id, - submission_id=submission.id, - title="Feedback", - description=description, - file_path=file_path, - line_start=line, - line_end=None, - credits=credits, - meta={}, - ) - ) +# for feedback in feedbacks: +# line = feedback.get("line", None) +# description = feedback.get("text", None) +# credits = feedback.get("credits", 0.0) +# feedback_proposals.append( +# Feedback( +# id=None, +# exercise_id=exercise.id, +# submission_id=submission.id, +# title="Feedback", +# description=description, +# file_path=file_path, +# line_start=line, +# line_end=None, +# credits=credits, +# meta={}, +# ) +# ) - return feedback_proposals \ No newline at end of file +# return feedback_proposals \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/basic/file_instructions.py b/module_programming_llm/module_programming_llm/basic/file_instructions.py deleted file mode 100644 index 961fa4f92..000000000 --- a/module_programming_llm/module_programming_llm/basic/file_instructions.py +++ /dev/null @@ -1,59 +0,0 @@ -import json - -from langchain.prompts import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) - -from athena.programming import Exercise -from athena.logger import logger - -from module_programming_llm.helpers.utils import get_diff, get_file_extension -from module_programming_llm.helpers.models import chat - -from .prompts.generate_file_grading_instructions import system_template as system_template_grading_instructions, human_template as human_template_grading_instructions -from .prompts.generate_file_problem_statements import system_template as system_template_problem_statements, human_template as human_template_problem_statements - -def generate_file_grading_instructions(exercise: Exercise): - grading_instructions = exercise.grading_instructions or "" - - solution_repo = exercise.get_solution_repository() - template_repo = exercise.get_template_repository() - file_extension = get_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) - - - - system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_grading_instructions) - human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_grading_instructions) - chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - - result = chat(chat_prompt.format_prompt(grading_instructions=grading_instructions, changed_files=changed_files).to_messages()) - - try: - return json.loads(result.content) - except json.JSONDecodeError: - logger.error("Could not decode JSON response from chat: %s", result.content) - return None - - -def generate_file_problem_statements(exercise: Exercise): - problem_statement = exercise.problem_statement - - solution_repo = exercise.get_solution_repository() - template_repo = exercise.get_template_repository() - file_extension = get_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) - - system_message_prompt = SystemMessagePromptTemplate.from_template(system_template_problem_statements) - human_message_prompt = HumanMessagePromptTemplate.from_template(human_template_problem_statements) - chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - - result = chat(chat_prompt.format_prompt(problem_statement=problem_statement, changed_files=changed_files).to_messages()) - - try: - return json.loads(result.content) - except json.JSONDecodeError: - logger.error("Could not decode JSON response from chat: %s", result.content) - return None \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py deleted file mode 100644 index 9294e4da3..000000000 --- a/module_programming_llm/module_programming_llm/basic/prompts/basic_feedback_provider.py +++ /dev/null @@ -1,40 +0,0 @@ -system_template = """\ -You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments. - -You receive a submission with some other information and respond with the following JSON format: -[{{"text": , "credits": , "line": }}] -Extremely Important: The response should only contain the json object with the feedback, nothing else! - -Effective feedback for programming assignments should possess the following qualities: -1. Constructive: Provide guidance on how to improve the code, pointing out areas that can be optimized, refactored, or enhanced. -2. Specific: Highlight exact lines or sections of code that need attention, and suggest precise changes or improvements. -3. Balanced: Recognize and praise the positive aspects of the code, while also addressing areas for improvement, to encourage and motivate the student. -4. Clear and concise: Use straightforward language and avoid overly technical jargon, so that the student can easily understand the feedback. -5. Actionable: Offer practical suggestions for how the student can apply the feedback to improve their code, ensuring they have a clear path forward. -6. Educational: Explain the reasoning behind the suggestions, so the student can learn from the feedback and develop their programming skills. - -Example response: -[\ -{{"text": "Great use of the compareTo method for comparing Dates, which is the proper way to compare objects.", "credits": 3, "line": 14}},\ -{{"text": "Good job implementing the BubbleSort algorithm for sorting Dates. It shows a clear understanding of the sorting process", "credits": 5, "line": null}},\ -{{"text": "Incorrect use of \'==\' for string comparison, which leads to unexpected results. Use the \'equals\' method for string comparison instead.", "credits": -2, "line": 18}}\ -]\ -""" - -human_template = """\ -Student\'s submission to grade: -{submission_content} -Diff between solution (deletions) and student\'s submission (additions): -{solution_to_submission_diff} -Diff between template (deletions) and student\'s submission (additions): -{template_to_submission_diff} -Problem statement: -{problem_statement} -Grading instructions: -{grading_instructions} -As said, it should be effective feedback following an extremely high standard. -Critically grade the submission and distribute credits accordingly. -Be liberal with interpreting the grading instructions. - -JSON response: -""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py b/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py deleted file mode 100644 index 8f4621bdf..000000000 --- a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_grading_instructions.py +++ /dev/null @@ -1,15 +0,0 @@ -system_template = """\ -You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments. - -You receive grading instructions and a list of changed files and respond in the following JSON format, associating each file with its grading instructions: -{{"": ""}} -""" - -human_template = """\ -Grading instructions: -{grading_instructions} -Changed files: -{changed_files} - -JSON response: -""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py b/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py deleted file mode 100644 index 28886f8b9..000000000 --- a/module_programming_llm/module_programming_llm/basic/prompts/generate_file_problem_statements.py +++ /dev/null @@ -1,15 +0,0 @@ -system_template = """\ -You are a programming tutor AI at a university tasked with grading and providing feedback to programming homework assignments. - -You receive a overall problem statement and a list of changed files and respond in the following JSON format, associating each file with its file-specific problem statement: -{{"": ""}} -""" - -human_template = """\ -Problem statement: -{problem_statement} -Changed files: -{changed_files} - -JSON response: -""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py new file mode 100644 index 000000000..173ec1255 --- /dev/null +++ b/module_programming_llm/module_programming_llm/config.py @@ -0,0 +1,69 @@ +from pydantic import BaseModel, Field + +from athena import config_schema_provider +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig +from module_programming_llm.prompts.generate_suggestions_by_file import ( + system_template as generate_suggestions_by_file_system_template, + human_template as generate_suggestions_by_file_human_template +) +from module_programming_llm.prompts.split_grading_instructions_by_file import ( + system_template as split_grading_instructions_by_file_template, + human_template as split_grading_instructions_by_file_human_template +) +from module_programming_llm.prompts.split_problem_statement_by_file import ( + system_template as split_problem_statements_by_file_system_template, + human_template as split_problem_statements_by_file_human_template +) + + +class SplitProblemStatementsByFilePrompt(BaseModel): + """\ +Features available: **{problem_statement}**, **{changed_files}**\ + +*Note: `changed_files` are the changed files between template and solution repository.*\ +""" + system_message: str = Field(default=split_problem_statements_by_file_system_template, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=split_problem_statements_by_file_human_template, + description="Message from a human. The input on which the AI is supposed to act.") + + +class SplitGradingInstructionsByFilePrompt(BaseModel): + """\ +Features available: **{grading_instructions}**, **{changed_files}** + +*Note: `changed_files` are the changed files between template and solution repository.*\ +""" + system_message: str = Field(default=split_grading_instructions_by_file_template, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=split_grading_instructions_by_file_human_template, + description="Message from a human. The input on which the AI is supposed to act.") + + +class GenerationPrompt(BaseModel): + """\ +Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \ +**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}** + +*Note: Prompt will be applied per file independently, submission is a single file.*\ +""" + system_message: str = Field(default=generate_suggestions_by_file_system_template, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=generate_suggestions_by_file_human_template, + description="Message from a human. The input on which the AI is supposed to act.") + + +class BasicApproachConfig(BaseModel): + """This approach uses a LLM with a single prompt to generate feedback in a single step.""" + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + + split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt()) + split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt()) + generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt()) + + +@config_schema_provider +class Configuration(BaseModel): + debug: bool = Field(default=False, description="Enable debug mode.") + approach: BasicApproachConfig = Field(default=BasicApproachConfig()) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py new file mode 100644 index 000000000..bc93269eb --- /dev/null +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -0,0 +1,179 @@ +from typing import List, Optional, Sequence + +from pydantic import BaseModel, Field +from langchain.chains.openai_functions import create_structured_output_chain + +from athena import emit_meta +from athena.programming import Exercise, Submission, Feedback +from athena.logger import logger + +from module_programming_llm.config import BasicApproachConfig +from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed +from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed +from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions +from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers + + +class FeedbackModel(BaseModel): + title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error") + description: str = Field(..., description="Feedback description") + line_start: Optional[int] = Field(..., description="Referenced line number start, or empty if unreferenced") + line_end: Optional[int] = Field(..., description="Referenced line number end, or empty if unreferenced") + credits: float = Field(0.0, description="Number of points received/deducted") + + class Config: + title = "Feedback" + + +class AssessmentModel(BaseModel): + """Collection of feedbacks making up an assessment""" + + feedbacks: Sequence[FeedbackModel] = Field(..., description="Assessment feedbacks") + + class Config: + title = "Assessment" + + +# pylint: disable=too-many-locals +async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: + model = config.model.get_model() + + # Get split grading instructions + split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug) + file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions } + + # Get split problem statement + split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug) + file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements } + + prompt_inputs: List[dict] = [] + + # Feature extraction + solution_repo = exercise.get_solution_repository() + template_repo = exercise.get_template_repository() + submission_repo = submission.get_repository() + + file_extension = get_programming_language_file_extension(exercise.programming_language) + if file_extension is None: + raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") + + files = load_files_from_repo( + submission_repo, + file_filter=lambda x: x.endswith(file_extension) if file_extension else False + ) + + for file_path, content in files.items(): + if content is None: + continue + + problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.") + grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.") + + content = add_line_numbers(content) + solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) + template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) + + prompt_inputs.append({ + "file_path": file_path, + "submission": content, + "max_points": exercise.max_points, + "bonus_points": exercise.bonus_points, + "solution_to_submission_diff": solution_to_submission_diff, + "template_to_submission_diff": template_to_submission_diff, + "grading_instructions": grading_instructions, + "problem_statement": problem_statement, + }) + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.generate_suggestions_by_file_prompt.system_message, + human_message=config.generate_suggestions_by_file_prompt.human_message, + pydantic_object=AssessmentModel + ) + + # Filter long prompts (omitting features if necessary) + omittable_features = [ + "problem_statement", + "grading_instructions", + "template_to_submission_diff", + "solution_to_submission_diff" + ] + prompt_inputs = [ + omitted_prompt_input for omitted_prompt_input, should_run in + [check_prompt_length_and_omit_features_if_necessary( + prompt=chat_prompt, + prompt_input=prompt_input, + max_input_tokens=config.max_input_tokens, + omittable_features=omittable_features, + debug=debug + ) for prompt_input in prompt_inputs] + if should_run + ] + + chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt) + if not prompt_inputs: + return [] + result = await chain.agenerate(prompt_inputs) + + logger.info("Generated result: %s ", result) + + return [] + # return predict_and_parse( + # model=model, + # chat_prompt=chat_prompt, + # prompt_input={ + # "grading_instructions": exercise.grading_instructions, + # "changed_files": changed_files + # }, + # pydantic_object=SplitGradingInstructions + # ) + + + + + +# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: + +# # Filter long prompts +# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] + +# # Completion +# chain = LLMChain(llm=chat, prompt=chat_prompt) +# if not input_list: +# return [] +# result = await chain.agenerate(input_list) + +# # Parse result +# feedback_proposals: List[Feedback] = [] +# for input, generations in zip(input_list, result.generations): +# file_path = input["file_path"] +# for generation in generations: +# try: +# feedbacks = json.loads(generation.text) +# except json.JSONDecodeError: +# logger.error("Failed to parse feedback json: %s", generation.text) +# continue +# if not isinstance(feedbacks, list): +# logger.error("Feedback json is not a list: %s", generation.text) +# continue + +# for feedback in feedbacks: +# line = feedback.get("line", None) +# description = feedback.get("text", None) +# credits = feedback.get("credits", 0.0) +# feedback_proposals.append( +# Feedback( +# id=None, +# exercise_id=exercise.id, +# submission_id=submission.id, +# title="Feedback", +# description=description, +# file_path=file_path, +# line_start=line, +# line_end=None, +# credits=credits, +# meta={}, +# ) +# ) + +# return feedback_proposals \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py new file mode 100644 index 000000000..53a300f00 --- /dev/null +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -0,0 +1,152 @@ +from typing import Type, TypeVar, List +from pydantic import BaseModel + +import tiktoken + +from langchain.chains import LLMChain +from langchain.chat_models import ChatOpenAI +from langchain.base_language import BaseLanguageModel +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) +from langchain.output_parsers import PydanticOutputParser, OutputFixingParser +from langchain.chains.openai_functions import create_structured_output_chain + +from athena import emit_meta + +T = TypeVar("T", bound=BaseModel) + + +def num_tokens_from_string(string: str) -> int: + """Returns the number of tokens in a text string.""" + encoding = tiktoken.get_encoding("cl100k_base") + num_tokens = len(encoding.encode(string)) + return num_tokens + + +def num_tokens_from_prompt(chat_prompt: ChatPromptTemplate, prompt_input: dict) -> int: + """Returns the number of tokens in a chat prompt.""" + return num_tokens_from_string(chat_prompt.format(**prompt_input)) + + +def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate, + prompt_input: dict, + max_input_tokens: int, + omittable_features: List[str], + debug: bool): + """Check if the input is too long and omit features if necessary. + + Note: Omitted features will be replaced with "omitted" in the prompt + + Args: + prompt (ChatPromptTemplate): Prompt template + prompt_input (dict): Prompt input + max_input_tokens (int): Maximum number of tokens allowed + omittable_features (List[str]): List of features that can be omitted, ordered by priority (least important first) + debug (bool): Debug flag + + Returns: + (dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and + should_run is True if the model should run, False otherwise + """ + if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens: + return prompt_input, True + + omitted_features = [] + + # Omit features until the input is short enough + for feature in omittable_features: + if feature in prompt_input: + omitted_features.append(feature) + prompt_input[feature] = "omitted" + if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens: + if debug: + emit_meta("omitted_features", omitted_features) + return prompt_input, True + + # If we get here, we couldn't omit enough features + return prompt_input, False + + +def supports_function_calling(model: BaseLanguageModel): + """Returns True if the model supports function calling, False otherwise + + Args: + model (BaseLanguageModel): The model to check + + Returns: + boolean: True if the model supports function calling, False otherwise + """ + return isinstance(model, ChatOpenAI) + + +def get_chat_prompt_with_formatting_instructions( + model: BaseLanguageModel, + system_message: str, + human_message: str, + pydantic_object: Type[T] + ) -> ChatPromptTemplate: + """Returns a ChatPromptTemplate with formatting instructions (if necessary) + + Note: Does nothing if the model supports function calling + + Args: + model (BaseLanguageModel): The model to check if it supports function calling + system_message (str): System message + human_message (str): Human message + pydantic_object (Type[T]): Model to parse the output + + Returns: + ChatPromptTemplate: ChatPromptTemplate with formatting instructions (if necessary) + """ + if supports_function_calling(model): + system_message_prompt = SystemMessagePromptTemplate.from_template(system_message) + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message) + return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + output_parser = PydanticOutputParser(pydantic_object=pydantic_object) + system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}") + system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} + system_message_prompt.prompt.input_variables.remove("format_instructions") + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:") + return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + +def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): + """Predicts and parses the output of the model + + Args: + model (BaseLanguageModel): The model to predict with + chat_prompt (ChatPromptTemplate): Prompt to use + prompt_input (dict): Input parameters to use for the prompt + pydantic_object (Type[T]): Pydantic model to parse the output + """ + if supports_function_calling(model): + chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) + return chain.run(**prompt_input) + + output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) + chain = LLMChain(llm=model, prompt=chat_prompt) + output = chain.run(**prompt_input) + return output_parser.parse(output) + + +async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): + """Generates and parses the output of the model + + Args: + model (BaseLanguageModel): The model to generate with + chat_prompt (ChatPromptTemplate): Prompt to use + prompt_input (dict): Input parameters to use for the prompt + pydantic_object (Type[T]): Pydantic model to parse the output + """ + if supports_function_calling(model): + chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) + return chain.run(**prompt_input) + + output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) + chain = LLMChain(llm=model, prompt=chat_prompt) + output = chain.run(**prompt_input) + return output_parser.parse(output) \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/helpers/models.py b/module_programming_llm/module_programming_llm/helpers/models.py deleted file mode 100644 index 70826a454..000000000 --- a/module_programming_llm/module_programming_llm/helpers/models.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from langchain.chat_models import AzureChatOpenAI, ChatOpenAI -import openai - -OPENAI_API_TYPE = os.environ.get("OPENAI_API_TYPE") - -# Validate environment variables -if "OPENAI_API_KEY" not in os.environ: - raise EnvironmentError("OPENAI_API_KEY environment variable not set.") -if OPENAI_API_TYPE == "azure": - if "OPENAI_API_BASE" not in os.environ: - raise EnvironmentError("OPENAI_API_TYPE=azure but OPENAI_API_BASE environment variable not set.") - if "OPENAI_API_VERSION" not in os.environ: - raise EnvironmentError("OPENAI_API_TYPE=azure but OPENAI_API_VERSION environment variable not set.") - if "AZURE_DEPLOYMENT_NAME" not in os.environ: - raise EnvironmentError("OPENAI_API_TYPE=azure but AZURE_DEPLOYMENT_NAME environment variable not set.") - - AZURE_DEPLOYMENT_NAME = os.environ["AZURE_DEPLOYMENT_NAME"] - - # Check if deployment exists - openai.api_type = "azure" - openai.api_key = os.environ["OPENAI_API_KEY"] - openai.api_base = os.environ["OPENAI_API_BASE"] - openai.api_version = os.environ["OPENAI_API_VERSION"] - - deployments = openai.Deployment.list().data # type: ignore - deployment_ids = [deployment.id for deployment in deployments] - if AZURE_DEPLOYMENT_NAME not in deployment_ids: - deployments = [{ "id": deployment.id, "model": deployment.model } for deployment in deployments] - raise EnvironmentError(f"Deployment id '{AZURE_DEPLOYMENT_NAME}' not found, available deployments: {deployments}") - - # Initialize azure chat model - chat = AzureChatOpenAI(deployment_name=AZURE_DEPLOYMENT_NAME, client="", temperature=0) -else: - # Initialize openai chat model - chat = ChatOpenAI(client="", temperature=0) diff --git a/module_programming_llm/module_programming_llm/helpers/models/__init__.py b/module_programming_llm/module_programming_llm/helpers/models/__init__.py new file mode 100644 index 000000000..f5ab68a2f --- /dev/null +++ b/module_programming_llm/module_programming_llm/helpers/models/__init__.py @@ -0,0 +1,36 @@ +import os +from typing import Type, Union, List +from module_programming_llm.helpers.models.model_config import ModelConfig + + +DefaultModelConfig: Type[ModelConfig] +default_model_name = os.environ.get("LLM_DEFAULT_MODEL") + +types: List[Type[ModelConfig]] = [] +try: + import module_programming_llm.helpers.models.openai as openai_config + types.append(openai_config.OpenAIModelConfig) + if default_model_name in openai_config.available_models: + DefaultModelConfig = openai_config.OpenAIModelConfig +except AttributeError: + pass + +try: + import module_programming_llm.helpers.models.replicate as replicate_config + types.append(replicate_config.ReplicateModelConfig) + if default_model_name in replicate_config.available_models: + DefaultModelConfig = replicate_config.ReplicateModelConfig +except AttributeError: + pass + +if not types: + raise EnvironmentError( + "No model configurations available, please set up at least one provider in the environment variables.") + +if 'DefaultModelConfig' not in globals(): + DefaultModelConfig = types[0] + +if len(types) == 1: + ModelConfigType = types[0] +else: + ModelConfigType = Union[tuple(types)] # type: ignore diff --git a/module_programming_llm/module_programming_llm/helpers/models/model_config.py b/module_programming_llm/module_programming_llm/helpers/models/model_config.py new file mode 100644 index 000000000..f433ab587 --- /dev/null +++ b/module_programming_llm/module_programming_llm/helpers/models/model_config.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod +from pydantic import BaseModel +from langchain.base_language import BaseLanguageModel + + +class ModelConfig(BaseModel, ABC): + + @abstractmethod + def get_model(self) -> BaseLanguageModel: + pass diff --git a/module_programming_llm/module_programming_llm/helpers/models/openai.py b/module_programming_llm/module_programming_llm/helpers/models/openai.py new file mode 100644 index 000000000..90cd24795 --- /dev/null +++ b/module_programming_llm/module_programming_llm/helpers/models/openai.py @@ -0,0 +1,321 @@ +import os +from contextlib import contextmanager +from typing import Any, Callable, Dict, List +from pydantic import Field, validator, PositiveInt +from enum import Enum + +import openai +from langchain.chat_models import AzureChatOpenAI, ChatOpenAI +from langchain.llms import AzureOpenAI, OpenAI +from langchain.llms.openai import BaseOpenAI +from langchain.base_language import BaseLanguageModel + +from athena.logger import logger +from .model_config import ModelConfig + + +OPENAI_PREFIX = "openai_" +AZURE_OPENAI_PREFIX = "azure_openai_" + + +######################################################################### +# Monkey patching openai/langchain api # +# ===================================================================== # +# This allows us to have multiple api keys i.e. mixing # +# openai and azure openai api keys so we can use not only deployed # +# models but also models from the non-azure openai api. # +# This is mostly for testing purposes, in production we can just deploy # +# the models to azure that we want to use. # +######################################################################### + +def _wrap(old: Any, new: Any) -> Callable: + def repl(*args: Any, **kwargs: Any) -> Any: + new(args[0]) # args[0] is self + return old(*args, **kwargs) + return repl + + +def _async_wrap(old: Any, new: Any): + async def repl(*args, **kwargs): + new(args[0]) # args[0] is self + return await old(*args, **kwargs) + return repl + + +def _set_credentials(self): + openai.api_key = self.openai_api_key + + api_type = "open_ai" + api_base = "https://api.openai.com/v1" + api_version = None + if hasattr(self, "openai_api_type"): + api_type = self.openai_api_type + + if api_type == "azure": + if hasattr(self, "openai_api_base"): + api_base = self.openai_api_base + if hasattr(self, "openai_api_version"): + api_version = self.openai_api_version + + openai.api_type = api_type + openai.api_base = api_base + openai.api_version = api_version + + +# Monkey patching langchain +# pylint: disable=protected-access +ChatOpenAI._generate = _wrap(ChatOpenAI._generate, _set_credentials) # type: ignore +ChatOpenAI._agenerate = _async_wrap(ChatOpenAI._agenerate, _set_credentials) # type: ignore +BaseOpenAI._generate = _wrap(BaseOpenAI._generate, _set_credentials) # type: ignore +BaseOpenAI._agenerate = _async_wrap(BaseOpenAI._agenerate, _set_credentials) # type: ignore +# pylint: enable=protected-access + +######################################################################### +# Monkey patching end # +######################################################################### + + +def _use_azure_credentials(): + openai.api_type = "azure" + openai.api_key = os.environ.get("LLM_AZURE_OPENAI_API_KEY") + openai.api_base = os.environ.get("LLM_AZURE_OPENAI_API_BASE") + # os.environ.get("LLM_AZURE_OPENAI_API_VERSION") + openai.api_version = "2023-03-15-preview" + + +def _use_openai_credentials(): + openai.api_type = "open_ai" + openai.api_key = os.environ.get("LLM_OPENAI_API_KEY") + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + + +openai_available = len(os.environ.get("LLM_OPENAI_API_KEY") or "") > 0 +azure_openai_available = len(os.environ.get("LLM_AZURE_OPENAI_API_KEY") or "") > 0 + + +# This is a hack to make sure that the openai api is set correctly +# Right now it is overkill, but it will be useful when the api gets fixed and we no longer +# hardcode the model names (i.e. OpenAI fixes their api) +@contextmanager +def _openai_client(use_azure_api: bool, is_preference: bool): + """Set the openai client to use the correct api type, if available + + Args: + use_azure_api (bool): If true, use the azure api, else use the openai api + is_preference (bool): If true, it can fall back to the other api if the preferred one is not available + """ + if use_azure_api: + if azure_openai_available: + _use_azure_credentials() + elif is_preference and openai_available: + _use_openai_credentials() + elif is_preference: + raise EnvironmentError( + "No OpenAI api available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and " + "LLM_AZURE_OPENAI_API_VERSION environment variables or LLM_OPENAI_API_KEY environment variable" + ) + else: + raise EnvironmentError( + "Azure OpenAI api not available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and " + "LLM_AZURE_OPENAI_API_VERSION environment variables" + ) + else: + if openai_available: + _use_openai_credentials() + elif is_preference and azure_openai_available: + _use_azure_credentials() + elif is_preference: + raise EnvironmentError( + "No OpenAI api available, please set LLM_OPENAI_API_KEY environment variable or LLM_AZURE_OPENAI_API_KEY, " + "LLM_AZURE_OPENAI_API_BASE and LLM_AZURE_OPENAI_API_VERSION environment variables" + ) + else: + raise EnvironmentError( + "OpenAI api not available, please set LLM_OPENAI_API_KEY environment variable" + ) + + # API client is setup correctly + yield + + +def _get_available_deployments(openai_models: Dict[str, List[str]], model_aliases: Dict[str, str]): + available_deployments: Dict[str, Dict[str, Any]] = { + "chat_completion": {}, + "completion": {}, + "fine_tuneing": {}, + } + + if azure_openai_available: + with _openai_client(use_azure_api=True, is_preference=False): + deployments = openai.Deployment.list().get("data") or [] # type: ignore + for deployment in deployments: + model_name = deployment.model + if model_name in model_aliases: + model_name = model_aliases[model_name] + if model_name in openai_models["chat_completion"]: + available_deployments["chat_completion"][deployment.id] = deployment + elif model_name in openai_models["completion"]: + available_deployments["completion"][deployment.id] = deployment + elif model_name in openai_models["fine_tuneing"]: + available_deployments["fine_tuneing"][deployment.id] = deployment + + return available_deployments + + +def _get_available_models(openai_models: Dict[str, List[str]], + available_deployments: Dict[str, Dict[str, Any]]): + available_models: Dict[str, BaseLanguageModel] = {} + + if openai_available: + openai_api_key = os.environ["LLM_OPENAI_API_KEY"] + for model_name in openai_models["chat_completion"]: + available_models[OPENAI_PREFIX + model_name] = ChatOpenAI( + model=model_name, openai_api_key=openai_api_key, client="") + for model_name in openai_models["completion"]: + available_models[OPENAI_PREFIX + model_name] = OpenAI( + model=model_name, openai_api_key=openai_api_key, client="") + + if azure_openai_available: + azure_openai_api_key = os.environ["LLM_AZURE_OPENAI_API_KEY"] + azure_openai_api_base = os.environ["LLM_AZURE_OPENAI_API_BASE"] + azure_openai_api_version = os.environ["LLM_AZURE_OPENAI_API_VERSION"] + + for model_type, Model in [("chat_completion", AzureChatOpenAI), ("completion", AzureOpenAI)]: + for deployment_name, deployment in available_deployments[model_type].items(): + available_models[AZURE_OPENAI_PREFIX + deployment_name] = Model( + model=deployment.model, + deployment_name=deployment_name, + openai_api_base=azure_openai_api_base, + openai_api_version=azure_openai_api_version, + openai_api_key=azure_openai_api_key, + client="", + ) + + return available_models + + +_model_aliases = { + "gpt-35-turbo": "gpt-3.5-turbo", +} + +# Hardcoded because openai can't provide a trustworthly api to get the list of models and capabilities... +openai_models = { + "chat_completion": [ + "gpt-4", + # "gpt-4-32k", # Not publicly available + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k" + ], + "completion": [ + "text-davinci-003", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + "fine_tuneing": [ + "davinci", + "curie", + "babbage", + "ada", + ] +} +available_deployments = _get_available_deployments(openai_models, _model_aliases) +available_models = _get_available_models(openai_models, available_deployments) + +if available_models: + logger.info("Available openai models: %s", ", ".join(available_models.keys())) + + OpenAIModel = Enum('OpenAIModel', {name: name for name in available_models}) # type: ignore + + + default_model_name = "gpt-3.5-turbo" + if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models: + default_model_name = os.environ["LLM_DEFAULT_MODEL"] + if default_model_name not in available_models: + default_model_name = list(available_models.keys())[0] + + default_openai_model = OpenAIModel[default_model_name] + + + # Long descriptions will be displayed in the playground UI and are copied from the OpenAI docs + class OpenAIModelConfig(ModelConfig): + """OpenAI LLM configuration.""" + + model_name: OpenAIModel = Field(default=default_openai_model, # type: ignore + description="The name of the model to use.") + max_tokens: PositiveInt = Field(1000, description="""\ +The maximum number of [tokens](https://platform.openai.com/tokenizer) to generate in the chat completion. + +The total length of input tokens and generated tokens is limited by the model's context length. \ +[Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.\ +""") + + temperature: float = Field(default=0.0, ge=0, le=2, description="""\ +What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, \ +while lower values like 0.2 will make it more focused and deterministic. + +We generally recommend altering this or `top_p` but not both.\ +""") + + top_p: float = Field(default=1, ge=0, le=1, description="""\ +An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. \ +So 0.1 means only the tokens comprising the top 10% probability mass are considered. + +We generally recommend altering this or `temperature` but not both.\ +""") + + presence_penalty: float = Field(default=0, ge=-2, le=2, description="""\ +Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, \ +increasing the model's likelihood to talk about new topics. + +[See more information about frequency and presence penalties.](https://platform.openai.com/docs/api-reference/parameter-details)\ +""") + + frequency_penalty: float = Field(default=0, ge=-2, le=2, description="""\ +Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, \ +decreasing the model's likelihood to repeat the same line verbatim. + +[See more information about frequency and presence penalties.](https://platform.openai.com/docs/api-reference/parameter-details)\ +""") + + @validator('max_tokens') + def max_tokens_must_be_positive(cls, v): + """ + Validate that max_tokens is a positive integer. + """ + if v <= 0: + raise ValueError('max_tokens must be a positive integer') + return v + + def get_model(self) -> BaseLanguageModel: + """Get the model from the configuration. + + Returns: + BaseLanguageModel: The model. + """ + model = available_models[self.model_name.value] + kwargs = model._lc_kwargs + secrets = {secret: getattr(model, secret) for secret in model.lc_secrets.keys()} + kwargs.update(secrets) + + model_kwargs = kwargs.get("model_kwargs", {}) + for attr, value in self.dict().items(): + if attr == "model_name": + # Skip model_name + continue + if hasattr(model, attr): + # If the model has the attribute, add it to kwargs + kwargs[attr] = value + else: + # Otherwise, add it to model_kwargs (necessary for chat models) + model_kwargs[attr] = value + kwargs["model_kwargs"] = model_kwargs + + # Initialize a copy of the model using the config + model = model.__class__(**kwargs) + return model + + + class Config: + title = 'OpenAI' \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py new file mode 100644 index 000000000..a706b8247 --- /dev/null +++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py @@ -0,0 +1,110 @@ +import os +from pydantic import Field, PositiveInt +from enum import Enum + +from langchain.llms import Replicate +from langchain.base_language import BaseLanguageModel + +from athena.logger import logger +from .model_config import ModelConfig + + +# Hardcoded list of models +replicate_models = { + # LLAMA 2 70B Chat + # https://replicate.com/replicate/llama-2-70b-chat + "llama-2-70b-chat": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", + # LLaMA 2 13B Chat + # https://replicate.com/a16z-infra/llama-2-13b-chat + "llama-2-13b-chat": "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", + # LLaMA 2 7B Chat + # https://replicate.com/a16z-infra/llama-2-7b-chat + "llama-2-7b-chat": "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c", +} + +available_models = {} +if len(os.environ.get("REPLICATE_API_TOKEN") or "") > 0: + available_models = { + name: Replicate( + model=model, + ) + for name, model in replicate_models.items() + } +else: + logger.warning("REPLICATE_API_TOKEN not found in environment variables. Replicate models are disabled.") + +if available_models: + logger.info("Available replicate models: %s", + ", ".join(available_models.keys())) + + ReplicateModel = Enum('ReplicateModel', {name: name for name in available_models}) # type: ignore + + + default_model_name = "llama-2-13b-chat" + if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models: + default_model_name = os.environ["LLM_DEFAULT_MODEL"] + if default_model_name not in available_models: + default_model_name = list(available_models.keys())[0] + + default_replicate_model = ReplicateModel[default_model_name] + + + # Note: Config has been setup with LLaMA 2 chat models in mind, other models may not work as expected + class ReplicateModelConfig(ModelConfig): + """Replicate LLM configuration.""" + + model_name: ReplicateModel = Field(default=default_replicate_model, # type: ignore + description="The name of the model to use.") + max_new_tokens: PositiveInt = Field(1000, description="""\ +Maximum number of tokens to generate. A word is generally 2-3 tokens (minimum: 1)\ +""") + min_new_tokens: int = Field(-1, description="""\ +Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens. (minimum: -1)\ +""") + temperature: float = Field(default=0.01, ge=0.01, le=5, description="""\ +Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\ +(minimum: 0.01; maximum: 5)\ +""") + top_p: float = Field(default=1, ge=0, le=1, description="""\ +When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens (maximum: 1)\ +""") + top_k: PositiveInt = Field(default=250, description="""\ +When decoding text, samples from the top k most likely tokens; lower to ignore less likely tokens\ +""") + repetition_penalty: float = Field(default=1, ge=0.01, le=5, description="""\ +Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, \ +less than 1 encourage it. (minimum: 0.01; maximum: 5)\ +""") + repetition_penalty_sustain: int = Field(default=-1, description=""" +Number of most recent tokens to apply repetition penalty to, -1 to apply to whole context (minimum: -1)\ +""") + token_repetition_penalty_decay: PositiveInt = Field(default=128, description="""\ +Gradually decrease penalty over this many tokens (minimum: 1)\ +""") + + def get_model(self) -> BaseLanguageModel: + """Get the model from the configuration. + + Returns: + BaseLanguageModel: The model. + """ + model = available_models[self.model_name.value] + kwargs = model._lc_kwargs + + input_kwargs = {} + for attr, value in self.dict().items(): + # Skip model_name + if attr == "model_name": + continue + input_kwargs[attr] = value + + # Set model parameters + kwargs["input"] = input_kwargs + + # Initialize a copy of the model using the config + model = model.__class__(**kwargs) + return model + + + class Config: + title = 'Replicate' diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py index a85cfa997..8f6c67ab8 100644 --- a/module_programming_llm/module_programming_llm/helpers/utils.py +++ b/module_programming_llm/module_programming_llm/helpers/utils.py @@ -34,7 +34,7 @@ def add_line_numbers(content: str) -> str: ) -def get_file_extension(programming_language: str) -> str | None: +def get_programming_language_file_extension(programming_language: str) -> str | None: # JAVA, C, OCAML, HASKELL, PYTHON, SWIFT, VHDL, ASSEMBLER, EMPTY, KOTLIN file_extensions = { "JAVA": ".java", diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py new file mode 100644 index 000000000..f1a300ae0 --- /dev/null +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -0,0 +1,27 @@ +system_template = """\ +You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. + +VERY IMPORTANT: Effective feedback for text assignments should be: +1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\ +""" + +human_template = """\ +Problem statement: +{problem_statement} + +Example solution: +{example_solution} + +Grading instructions: +{grading_instructions} +Max points: {max_points}, bonus points: {bonus_points} + +Student\'s submission to grade (with line numbers : ): +{submission} + +Diff between solution (deletions) and student\'s submission (additions): +{solution_to_submission_diff} + +Diff between template (deletions) and student\'s submission (additions): +{template_to_submission_diff} +""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py new file mode 100644 index 000000000..7c0cc685d --- /dev/null +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -0,0 +1,13 @@ +system_template = """\ +You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. + +Restructure the grading instructions by changed file. +""" + +human_template = """\ +Grading instructions: +{grading_instructions} + +Changed files: +{changed_files} +""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py new file mode 100644 index 000000000..a1ee99f9f --- /dev/null +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -0,0 +1,13 @@ +system_template = """\ +You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. + +Restructure the grading instructions by changed file. +""" + +human_template = """\ +Problem statement: +{problem_statement} + +Changed files: +{changed_files} +""" \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py new file mode 100644 index 000000000..fefa93e14 --- /dev/null +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -0,0 +1,95 @@ +from typing import Optional, Sequence +from athena import emit_meta + +from pydantic import BaseModel, Field + +from athena.programming import Exercise +from athena.storage import store_exercise + +from module_programming_llm.config import BasicApproachConfig +from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse +from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension + + +FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions" + + +class FileGradingInstruction(BaseModel): + file_name: str = Field(..., description="File name") + grading_instructions: str = Field(..., description="Grading instructions relevant for this file") + + +class SplitGradingInstructions(BaseModel): + """Collection of grading instructions split by file""" + instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + + +def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: + """Split the general grading instructions by file + + Args: + exercise (Exercise): Exercise to split the grading instructions for + config (BasicApproachConfig): Configuration + + Returns: + SplitGradingInstructions: Grading instructions split by file, empty if input was too long + """ + if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "": + return SplitGradingInstructions(instructions=[]) + + model = config.model.get_model() + + solution_repo = exercise.get_solution_repository() + template_repo = exercise.get_template_repository() + file_extension = get_programming_language_file_extension(exercise.programming_language) or "" + changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.split_grading_instructions_by_file_prompt.system_message, + human_message=config.split_grading_instructions_by_file_prompt.human_message, + pydantic_object=SplitGradingInstructions + ) + + prompt_input = { + "grading_instructions": exercise.grading_instructions, + "changed_files": changed_files + } + + # If the input is too long, return an empty SplitGradingInstructions object + prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) + if prompt_length > config.max_input_tokens: + if debug: + emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") + return SplitGradingInstructions(instructions=[]) + + split_grading_instructions = predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=SplitGradingInstructions + ) + + if debug: + emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict()) + + return split_grading_instructions + + +def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: + """Generate and store the split grading instructions if needed + + Args: + exercise (Exercise): Exercise to get the split grading instructions for + config (BasicApproachConfig): Configuration + + Returns: + SplitGradingInstructions: Grading instructions split by file + """ + if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta: + return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY]) + + split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug) + exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict() + store_exercise(exercise) + return split_grading_instructions diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py new file mode 100644 index 000000000..8329e5749 --- /dev/null +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -0,0 +1,95 @@ +from typing import Sequence +from athena import emit_meta + +from pydantic import BaseModel, Field + +from athena.programming import Exercise +from athena.storage import store_exercise + +from module_programming_llm.config import BasicApproachConfig +from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse +from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension + + +FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements" + + +class FileProblemStatement(BaseModel): + file_name: str = Field(..., description="File name") + problem_statement: str = Field(..., description="Problem statement relevant for this file") + + +class SplitProblemStatement(BaseModel): + """Collection of problem statements split by file""" + problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + + +def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: + """Split the general problem statement by file + + Args: + exercise (Exercise): Exercise to split the problem statement for + config (BasicApproachConfig): Configuration + + Returns: + SplitProblemStatement: Problem statement split by file, empty if input was too long + """ + if exercise.problem_statement.strip() == "": + return SplitProblemStatement(problem_statements=[]) + + model = config.model.get_model() + + solution_repo = exercise.get_solution_repository() + template_repo = exercise.get_template_repository() + file_extension = get_programming_language_file_extension(exercise.programming_language) or "" + changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.split_problem_statement_by_file_prompt.system_message, + human_message=config.split_problem_statement_by_file_prompt.system_message, + pydantic_object=SplitProblemStatement + ) + + prompt_input = { + "problem_statement": exercise.problem_statement, + "changed_files": changed_files + } + + # If the input is too long, return an empty SplitProblemStatement object + prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) + if prompt_length > config.max_input_tokens: + if debug: + emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") + return SplitProblemStatement(problem_statements=[]) + + split_problem_statement = predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=SplitProblemStatement + ) + + if debug: + emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict()) + + return split_problem_statement + + +def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: + """Generate and store the split problem statement if needed + + Args: + exercise (Exercise): Exercise to split the problem statement for + config (BasicApproachConfig): Configuration + + Returns: + SplitProblemStatement: Problem statement split by file + """ + if FILE_PROBLEM_STATEMETS_KEY in exercise.meta: + return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY]) + + split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug) + exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict() + store_exercise(exercise) + return split_problem_statement diff --git a/module_programming_llm/poetry.lock b/module_programming_llm/poetry.lock index c0f838c48..008a22564 100644 --- a/module_programming_llm/poetry.lock +++ b/module_programming_llm/poetry.lock @@ -146,6 +146,26 @@ doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd- test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (<0.22)"] +[[package]] +name = "astroid" +version = "2.15.6" +description = "An abstract syntax tree for Python with inference support." +category = "dev" +optional = false +python-versions = ">=3.7.2" +files = [ + {file = "astroid-2.15.6-py3-none-any.whl", hash = "sha256:389656ca57b6108f939cf5d2f9a2a825a3be50ba9d589670f393236e0a03b91c"}, + {file = "astroid-2.15.6.tar.gz", hash = "sha256:903f024859b7c7687d7a7f3a3f73b17301f8e42dfd9cc9df9d4418172d3e2dbd"}, +] + +[package.dependencies] +lazy-object-proxy = ">=1.4.0" +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} +wrapt = [ + {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, +] + [[package]] name = "async-timeout" version = "4.0.3" @@ -343,6 +363,33 @@ typing-inspect = ">=0.4.0" [package.extras] dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=7.2.0)", "setuptools", "simplejson", "twine", "types-dataclasses", "wheel"] +[[package]] +name = "dill" +version = "0.3.7" +description = "serialize all of Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "dodgy" +version = "0.2.1" +description = "Dodgy: Searches for dodgy looking lines in Python code" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "dodgy-0.2.1-py3-none-any.whl", hash = "sha256:51f54c0fd886fa3854387f354b19f429d38c04f984f38bc572558b703c0542a6"}, + {file = "dodgy-0.2.1.tar.gz", hash = "sha256:28323cbfc9352139fdd3d316fa17f325cc0e9ac74438cbba51d70f9b48f86c3a"}, +] + [[package]] name = "exceptiongroup" version = "1.1.3" @@ -380,6 +427,38 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (> doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"] test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"] +[[package]] +name = "flake8" +version = "2.3.0" +description = "the modular source code checker: pep8, pyflakes and co" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "flake8-2.3.0-py2.py3-none-any.whl", hash = "sha256:c99cc9716d6655d9c8bcb1e77632b8615bf0abd282d7abd9f5c2148cad7fc669"}, + {file = "flake8-2.3.0.tar.gz", hash = "sha256:5ee1a43ccd0716d6061521eec6937c983efa027793013e572712c4da55c7c83e"}, +] + +[package.dependencies] +mccabe = ">=0.2.1" +pep8 = ">=1.5.7" +pyflakes = ">=0.8.1" + +[[package]] +name = "flake8-polyfill" +version = "1.0.2" +description = "Polyfill package for Flake8 plugins" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "flake8-polyfill-1.0.2.tar.gz", hash = "sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda"}, + {file = "flake8_polyfill-1.0.2-py2.py3-none-any.whl", hash = "sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9"}, +] + +[package.dependencies] +flake8 = "*" + [[package]] name = "frozenlist" version = "1.4.0" @@ -625,6 +704,24 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "isort" +version = "5.12.0" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] + +[package.extras] +colors = ["colorama (>=0.4.3)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + [[package]] name = "joblib" version = "1.3.2" @@ -693,6 +790,52 @@ files = [ pydantic = ">=1,<3" requests = ">=2,<3" +[[package]] +name = "lazy-object-proxy" +version = "1.9.0" +description = "A fast and thorough lazy object proxy." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, +] + [[package]] name = "marshmallow" version = "3.20.1" @@ -729,6 +872,18 @@ files = [ [package.dependencies] marshmallow = ">=2.0.0" +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + [[package]] name = "multidict" version = "6.0.4" @@ -1026,6 +1181,89 @@ files = [ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] +[[package]] +name = "pep8" +version = "1.7.1" +description = "Python style guide checker" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pep8-1.7.1-py2.py3-none-any.whl", hash = "sha256:b22cfae5db09833bb9bd7c8463b53e1a9c9b39f12e304a8d0bba729c501827ee"}, + {file = "pep8-1.7.1.tar.gz", hash = "sha256:fe249b52e20498e59e0b5c5256aa52ee99fc295b26ec9eaa85776ffdb9fe6374"}, +] + +[[package]] +name = "pep8-naming" +version = "0.10.0" +description = "Check PEP-8 naming conventions, plugin for flake8" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pep8-naming-0.10.0.tar.gz", hash = "sha256:f3b4a5f9dd72b991bf7d8e2a341d2e1aa3a884a769b5aaac4f56825c1763bf3a"}, + {file = "pep8_naming-0.10.0-py2.py3-none-any.whl", hash = "sha256:5d9f1056cb9427ce344e98d1a7f5665710e2f20f748438e308995852cfa24164"}, +] + +[package.dependencies] +flake8-polyfill = ">=1.0.2,<2" + +[[package]] +name = "platformdirs" +version = "3.10.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, + {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "prospector" +version = "1.10.2" +description = "Prospector is a tool to analyse Python code by aggregating the result of other tools." +category = "dev" +optional = false +python-versions = ">=3.7.2,<4.0" +files = [ + {file = "prospector-1.10.2-py3-none-any.whl", hash = "sha256:3bfe103c28bb821cca84926ca31357fbfd32405e4bf8c34ca2e55885684557e4"}, + {file = "prospector-1.10.2.tar.gz", hash = "sha256:cc8f09e79bdd32247edddf05b666940e88ad96338a84f5717b1e8c0678337821"}, +] + +[package.dependencies] +dodgy = ">=0.2.1,<0.3.0" +flake8 = "<6.0.0" +GitPython = ">=3.1.27,<4.0.0" +mccabe = ">=0.7.0,<0.8.0" +packaging = "*" +pep8-naming = ">=0.3.3,<=0.10.0" +pycodestyle = ">=2.9.0" +pydocstyle = ">=2.0.0" +pyflakes = ">=2.2.0,<3" +pylint = ">=2.8.3" +pylint-celery = "0.3" +pylint-django = ">=2.5,<2.6" +pylint-flask = "0.6" +pylint-plugin-utils = ">=0.7,<0.8" +PyYAML = "*" +requirements-detector = ">=1.2.0" +setoptconf-tmp = ">=0.3.1,<0.4.0" +toml = ">=0.10.2,<0.11.0" + +[package.extras] +with-bandit = ["bandit (>=1.5.1)"] +with-everything = ["bandit (>=1.5.1)", "mypy (>=0.600)", "pyright (>=1.1.3)", "pyroma (>=2.4)", "vulture (>=1.5)"] +with-mypy = ["mypy (>=0.600)"] +with-pyright = ["pyright (>=1.1.3)"] +with-pyroma = ["pyroma (>=2.4)"] +with-vulture = ["vulture (>=1.5)"] + [[package]] name = "psycopg2" version = "2.9.7" @@ -1047,6 +1285,18 @@ files = [ {file = "psycopg2-2.9.7.tar.gz", hash = "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8"}, ] +[[package]] +name = "pycodestyle" +version = "2.11.0" +description = "Python style guide checker" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.0-py2.py3-none-any.whl", hash = "sha256:5d1013ba8dc7895b548be5afb05740ca82454fd899971563d2ef625d090326f8"}, + {file = "pycodestyle-2.11.0.tar.gz", hash = "sha256:259bcc17857d8a8b3b4a2327324b79e5f020a13c16074670f9c8c8f872ea76d0"}, +] + [[package]] name = "pydantic" version = "1.10.12" @@ -1100,6 +1350,130 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pydocstyle" +version = "6.3.0" +description = "Python docstring style checker" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019"}, + {file = "pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1"}, +] + +[package.dependencies] +snowballstemmer = ">=2.2.0" + +[package.extras] +toml = ["tomli (>=1.2.3)"] + +[[package]] +name = "pyflakes" +version = "2.5.0" +description = "passive checker of Python programs" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, + {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, +] + +[[package]] +name = "pylint" +version = "2.17.5" +description = "python code static checker" +category = "dev" +optional = false +python-versions = ">=3.7.2" +files = [ + {file = "pylint-2.17.5-py3-none-any.whl", hash = "sha256:73995fb8216d3bed149c8d51bba25b2c52a8251a2c8ac846ec668ce38fab5413"}, + {file = "pylint-2.17.5.tar.gz", hash = "sha256:f7b601cbc06fef7e62a754e2b41294c2aa31f1cb659624b9a85bcba29eaf8252"}, +] + +[package.dependencies] +astroid = ">=2.15.6,<=2.17.0-dev0" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +dill = [ + {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, +] +isort = ">=4.2.5,<6" +mccabe = ">=0.6,<0.8" +platformdirs = ">=2.2.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +tomlkit = ">=0.10.1" + +[package.extras] +spelling = ["pyenchant (>=3.2,<4.0)"] +testutils = ["gitpython (>3)"] + +[[package]] +name = "pylint-celery" +version = "0.3" +description = "pylint-celery is a Pylint plugin to aid Pylint in recognising and understandingerrors caused when using the Celery library" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pylint-celery-0.3.tar.gz", hash = "sha256:41e32094e7408d15c044178ea828dd524beedbdbe6f83f712c5e35bde1de4beb"}, +] + +[package.dependencies] +astroid = ">=1.0" +pylint = ">=1.0" +pylint-plugin-utils = ">=0.2.1" + +[[package]] +name = "pylint-django" +version = "2.5.3" +description = "A Pylint plugin to help Pylint understand the Django web framework" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pylint-django-2.5.3.tar.gz", hash = "sha256:0ac090d106c62fe33782a1d01bda1610b761bb1c9bf5035ced9d5f23a13d8591"}, + {file = "pylint_django-2.5.3-py3-none-any.whl", hash = "sha256:56b12b6adf56d548412445bd35483034394a1a94901c3f8571980a13882299d5"}, +] + +[package.dependencies] +pylint = ">=2.0,<3" +pylint-plugin-utils = ">=0.7" + +[package.extras] +for-tests = ["coverage", "django-tables2", "django-tastypie", "factory-boy", "pylint (>=2.13)", "pytest", "wheel"] +with-django = ["Django"] + +[[package]] +name = "pylint-flask" +version = "0.6" +description = "pylint-flask is a Pylint plugin to aid Pylint in recognizing and understanding errors caused when using Flask" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pylint-flask-0.6.tar.gz", hash = "sha256:f4d97de2216bf7bfce07c9c08b166e978fe9f2725de2a50a9845a97de7e31517"}, +] + +[package.dependencies] +pylint-plugin-utils = ">=0.2.1" + +[[package]] +name = "pylint-plugin-utils" +version = "0.7" +description = "Utilities and helpers for writing Pylint plugins" +category = "dev" +optional = false +python-versions = ">=3.6.2" +files = [ + {file = "pylint-plugin-utils-0.7.tar.gz", hash = "sha256:ce48bc0516ae9415dd5c752c940dfe601b18fe0f48aa249f2386adfa95a004dd"}, + {file = "pylint_plugin_utils-0.7-py3-none-any.whl", hash = "sha256:b3d43e85ab74c4f48bb46ae4ce771e39c3a20f8b3d56982ab17aa73b4f98d535"}, +] + +[package.dependencies] +pylint = ">=1.7" + [[package]] name = "python-dotenv" version = "1.0.0" @@ -1305,6 +1679,51 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requirements-detector" +version = "1.2.2" +description = "Python tool to find and list requirements of a Python project" +category = "dev" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "requirements_detector-1.2.2-py3-none-any.whl", hash = "sha256:d7c60493bf166da3dd59de0e6cb25765e0e32a1931aeae92614034e5786d0bd0"}, + {file = "requirements_detector-1.2.2.tar.gz", hash = "sha256:3642cd7a5b261d79536c36bb7ecacf2adabd902d2e0e42bfb2ba82515da10501"}, +] + +[package.dependencies] +astroid = ">=2.0,<3.0" +packaging = ">=21.3" +semver = ">=3.0.0,<4.0.0" +toml = ">=0.10.2,<0.11.0" + +[[package]] +name = "semver" +version = "3.0.1" +description = "Python helper for Semantic Versioning (https://semver.org)" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "semver-3.0.1-py3-none-any.whl", hash = "sha256:2a23844ba1647362c7490fe3995a86e097bb590d16f0f32dfc383008f19e4cdf"}, + {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"}, +] + +[[package]] +name = "setoptconf-tmp" +version = "0.3.1" +description = "A module for retrieving program settings from various sources in a consistant method." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "setoptconf-tmp-0.3.1.tar.gz", hash = "sha256:e0480addd11347ba52f762f3c4d8afa3e10ad0affbc53e3ffddc0ca5f27d5778"}, + {file = "setoptconf_tmp-0.3.1-py3-none-any.whl", hash = "sha256:76035d5cd1593d38b9056ae12d460eca3aaa34ad05c315b69145e138ba80a745"}, +] + +[package.extras] +yaml = ["pyyaml"] + [[package]] name = "smmap" version = "5.0.0" @@ -1329,6 +1748,18 @@ files = [ {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + [[package]] name = "sqlalchemy" version = "2.0.20" @@ -1488,6 +1919,18 @@ requests = ">=2.26.0" [package.extras] blobfile = ["blobfile (>=2)"] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -1500,6 +1943,18 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tomlkit" +version = "0.12.1" +description = "Style preserving TOML library" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, + {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, +] + [[package]] name = "tqdm" version = "4.66.1" @@ -1587,6 +2042,91 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ + {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"}, + {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"}, + {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"}, + {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"}, + {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"}, + {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"}, + {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"}, + {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"}, + {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"}, + {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"}, + {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"}, + {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"}, + {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"}, + {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"}, + {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"}, + {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"}, + {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"}, + {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, + {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, +] + [[package]] name = "yarl" version = "1.9.2" @@ -1678,4 +2218,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "87a7b3b9660b3adad9c83f2095febfcb80d6bf6b75c30be1a3f29b1716da9aa2" +content-hash = "0d1a3b7f1022776e878127bb7296ab78ca439dd4b5c46af7bd70e05ba28a9198" diff --git a/module_programming_llm/pyproject.toml b/module_programming_llm/pyproject.toml index 425b47051..e785707cc 100644 --- a/module_programming_llm/pyproject.toml +++ b/module_programming_llm/pyproject.toml @@ -19,6 +19,9 @@ tiktoken = "^0.4.0" [tool.poetry.scripts] module = "athena:run_module" +[tool.poetry.group.dev.dependencies] +prospector = "^1.10.2" + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" From dc9229d82c8f2af3e1d5eaa24d539cde68bd3772 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 21:09:42 +0200 Subject: [PATCH 03/51] add changes --- athena/athena/helpers/programming/code_repository.py | 4 ++-- athena/athena/schemas/programming_exercise.py | 2 +- .../prompts/split_grading_instructions_by_file.py | 2 ++ .../prompts/split_problem_statement_by_file.py | 4 +++- .../split_grading_instructions_by_file.py | 7 +++++++ 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/athena/athena/helpers/programming/code_repository.py b/athena/athena/helpers/programming/code_repository.py index 32c264905..53c6f1595 100644 --- a/athena/athena/helpers/programming/code_repository.py +++ b/athena/athena/helpers/programming/code_repository.py @@ -43,7 +43,7 @@ def get_repository(url: str) -> Repo: repo_zip.extractall(cache_dir_path) if not (cache_dir_path / ".git").exists(): repo = Repo.init(cache_dir_path, initial_branch='main') - repo.index.add(repo.untracked_files) - repo.index.commit("Initial commit") + repo.git.add(all=True, force=True) + repo.git.commit('-m', 'Initial commit') return Repo(cache_dir_path) \ No newline at end of file diff --git a/athena/athena/schemas/programming_exercise.py b/athena/athena/schemas/programming_exercise.py index 0f9d40c44..2ac9610e5 100644 --- a/athena/athena/schemas/programming_exercise.py +++ b/athena/athena/schemas/programming_exercise.py @@ -1,6 +1,6 @@ from pydantic import Field, AnyUrl from zipfile import ZipFile -from git import Repo +from git.repo import Repo from athena.helpers.programming.code_repository import get_repository_zip, get_repository from .exercise_type import ExerciseType diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 7c0cc685d..833f7ddd6 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -10,4 +10,6 @@ Changed files: {changed_files} + +Grading instructions by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index a1ee99f9f..6175ec07f 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file. +Restructure the problem statement by changed file. """ human_template = """\ @@ -10,4 +10,6 @@ Changed files: {changed_files} + +Problem statement by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index fefa93e14..0982e5c2a 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -5,6 +5,7 @@ from athena.programming import Exercise from athena.storage import store_exercise +from athena.logger import logger from module_programming_llm.config import BasicApproachConfig from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse @@ -44,6 +45,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach file_extension = get_programming_language_file_extension(exercise.programming_language) or "" changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + # logger.info("Exercise: %s", file_extension) + # logger.info("Changed files: %s", changed_files) + # logger.info("Solution repo: %s", solution_repo) + # logger.info("Template repo: %s", template_repo) + # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) + chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, system_message=config.split_grading_instructions_by_file_prompt.system_message, From 3ddb848028aeff091afd22217edb8eda1d60a45a Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 21:30:15 +0200 Subject: [PATCH 04/51] change prompt --- module_programming_llm/module_programming_llm/config.py | 8 ++++---- .../prompts/split_grading_instructions_by_file.py | 9 ++++++--- .../prompts/split_problem_statement_by_file.py | 7 +++++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 173ec1255..0d6cc7e95 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -18,10 +18,11 @@ class SplitProblemStatementsByFilePrompt(BaseModel): """\ -Features available: **{problem_statement}**, **{changed_files}**\ +Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** *Note: `changed_files` are the changed files between template and solution repository.*\ """ + tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_problem_statements_by_file_human_template, @@ -30,10 +31,9 @@ class SplitProblemStatementsByFilePrompt(BaseModel): class SplitGradingInstructionsByFilePrompt(BaseModel): """\ -Features available: **{grading_instructions}**, **{changed_files}** - -*Note: `changed_files` are the changed files between template and solution repository.*\ +Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** """ + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_grading_instructions_by_file_human_template, diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 833f7ddd6..2f181609f 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,15 +1,18 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file. +Restructure the grading instructions by changed file to make it simpler. """ human_template = """\ Grading instructions: {grading_instructions} -Changed files: -{changed_files} +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission: +{changed_files_from_template_to_submission} Grading instructions by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index 6175ec07f..c06aea30e 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -8,8 +8,11 @@ Problem statement: {problem_statement} -Changed files: -{changed_files} +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission: +{changed_files_from_template_to_submission} Problem statement by file: """ \ No newline at end of file From 7cb7c44ce9956f5875fcd83ef1dac5ba9a18f264 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 22:20:06 +0200 Subject: [PATCH 05/51] update split problem statements and grading instructions --- .../module_programming_llm/__main__.py | 8 +- .../split_grading_instructions_by_file.py | 99 ++++++++++--------- .../split_problem_statement_by_file.py | 97 +++++++++--------- 3 files changed, 103 insertions(+), 101 deletions(-) diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index e4fb070fe..786d9d824 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -8,18 +8,12 @@ from module_programming_llm.config import Configuration from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file -from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed -from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed @submissions_consumer -def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration): +def receive_submissions(exercise: Exercise, submissions: List[Submission]): logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id) - # Split problem statements and grading instructions for later - generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) - generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) - @submission_selector def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission: diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 0982e5c2a..8d45b597f 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -3,16 +3,16 @@ from pydantic import BaseModel, Field -from athena.programming import Exercise -from athena.storage import store_exercise -from athena.logger import logger +from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension - - -FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions" +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff class FileGradingInstruction(BaseModel): @@ -22,34 +22,50 @@ class FileGradingInstruction(BaseModel): class SplitGradingInstructions(BaseModel): """Collection of grading instructions split by file""" - instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") -def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: +async def split_grading_instructions_by_file( + exercise: Exercise, + submission: Submission, + config: BasicApproachConfig, + debug: bool + ) -> Optional[SplitGradingInstructions]: """Split the general grading instructions by file Args: - exercise (Exercise): Exercise to split the grading instructions for + exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files) + submission (Submission): Submission to split the grading instructions for (respecting the changed files) config (BasicApproachConfig): Configuration Returns: - SplitGradingInstructions: Grading instructions split by file, empty if input was too long + Optional[SplitGradingInstructions]: Split grading instructions, None if it is too short or too long """ - if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "": - return SplitGradingInstructions(instructions=[]) + + # Return None if the grading instructions are too short + if (exercise.grading_instructions is None + or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split): + return None model = config.model.get_model() - solution_repo = exercise.get_solution_repository() template_repo = exercise.get_template_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) - - # logger.info("Exercise: %s", file_extension) - # logger.info("Changed files: %s", changed_files) - # logger.info("Solution repo: %s", solution_repo) - # logger.info("Template repo: %s", template_repo) - # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True + ).split("\n") + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, @@ -60,15 +76,13 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach prompt_input = { "grading_instructions": exercise.grading_instructions, - "changed_files": changed_files + "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution), + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } - # If the input is too long, return an empty SplitGradingInstructions object - prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) - if prompt_length > config.max_input_tokens: - if debug: - emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") - return SplitGradingInstructions(instructions=[]) + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None split_grading_instructions = predict_and_parse( model=model, @@ -78,25 +92,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach ) if debug: - emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict()) - - return split_grading_instructions + emit_meta("file_problem_statement", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_grading_instructions.dict() + }) + if not split_grading_instructions.file_grading_instructions: + return None -def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: - """Generate and store the split grading instructions if needed - - Args: - exercise (Exercise): Exercise to get the split grading instructions for - config (BasicApproachConfig): Configuration - - Returns: - SplitGradingInstructions: Grading instructions split by file - """ - if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta: - return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY]) - - split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug) - exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict() - store_exercise(exercise) return split_grading_instructions diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 8329e5749..1000ad245 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -1,17 +1,18 @@ -from typing import Sequence +from typing import Optional, Sequence from athena import emit_meta from pydantic import BaseModel, Field -from athena.programming import Exercise -from athena.storage import store_exercise +from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension - - -FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements" +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff class FileProblemStatement(BaseModel): @@ -21,28 +22,49 @@ class FileProblemStatement(BaseModel): class SplitProblemStatement(BaseModel): """Collection of problem statements split by file""" - problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") -def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: +async def split_problem_statement_by_file( + exercise: Exercise, + submission: Submission, + config: BasicApproachConfig, + debug: bool + ) -> Optional[SplitProblemStatement]: """Split the general problem statement by file Args: - exercise (Exercise): Exercise to split the problem statement for + exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) + submission (Submission): Submission to split the problem statement for (respecting the changed files) config (BasicApproachConfig): Configuration Returns: - SplitProblemStatement: Problem statement split by file, empty if input was too long + Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long """ - if exercise.problem_statement.strip() == "": - return SplitProblemStatement(problem_statements=[]) + # Return None if the problem statement is too short + if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split: + return None + model = config.model.get_model() - - solution_repo = exercise.get_solution_repository() + template_repo = exercise.get_template_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True + ).split("\n") + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, @@ -50,18 +72,16 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon human_message=config.split_problem_statement_by_file_prompt.system_message, pydantic_object=SplitProblemStatement ) - + prompt_input = { "problem_statement": exercise.problem_statement, - "changed_files": changed_files + "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution), + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } - # If the input is too long, return an empty SplitProblemStatement object - prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) - if prompt_length > config.max_input_tokens: - if debug: - emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") - return SplitProblemStatement(problem_statements=[]) + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None split_problem_statement = predict_and_parse( model=model, @@ -71,25 +91,12 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon ) if debug: - emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict()) + emit_meta("file_problem_statement", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_problem_statement.dict() + }) - return split_problem_statement - - -def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: - """Generate and store the split problem statement if needed - - Args: - exercise (Exercise): Exercise to split the problem statement for - config (BasicApproachConfig): Configuration - - Returns: - SplitProblemStatement: Problem statement split by file - """ - if FILE_PROBLEM_STATEMETS_KEY in exercise.meta: - return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY]) + if not split_problem_statement.file_problem_statements: + return None - split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug) - exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict() - store_exercise(exercise) return split_problem_statement From cf76e06f521ae3bc771a3aac6967f606422ed67c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 23:46:04 +0200 Subject: [PATCH 06/51] refactor generator --- .../module_programming_llm/config.py | 5 +- .../generate_suggestions_by_file.py | 246 +++++++++++------- .../helpers/llm_utils.py | 21 +- .../module_programming_llm/helpers/utils.py | 2 +- .../prompts/generate_suggestions_by_file.py | 4 +- .../split_grading_instructions_by_file.py | 2 +- .../split_problem_statement_by_file.py | 2 +- 7 files changed, 157 insertions(+), 125 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 0d6cc7e95..3507eded0 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -43,9 +43,9 @@ class SplitGradingInstructionsByFilePrompt(BaseModel): class GenerationPrompt(BaseModel): """\ Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \ -**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}** +**{submission_file}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}** -*Note: Prompt will be applied per file independently, submission is a single file.*\ +*Note: Prompt will be applied per file independently. Also, you don't have to include all features, e.g. template_to_solution_diff.*\ """ system_message: str = Field(default=generate_suggestions_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") @@ -58,6 +58,7 @@ class BasicApproachConfig(BaseModel): max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_number_of_files: int = Field(default=25, description="Maximum number of files.") split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt()) split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt()) generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt()) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index bc93269eb..ad8daf47e 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -1,17 +1,25 @@ from typing import List, Optional, Sequence - +import asyncio from pydantic import BaseModel, Field -from langchain.chains.openai_functions import create_structured_output_chain from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from athena.logger import logger from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed -from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed -from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers +from module_programming_llm.split_grading_instructions_by_file import split_grading_instructions_by_file +from module_programming_llm.split_problem_statement_by_file import split_problem_statement_by_file +from module_programming_llm.helpers.llm_utils import ( + check_prompt_length_and_omit_features_if_necessary, + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + predict_and_parse, +) +from module_programming_llm.helpers.utils import( + get_diff, + load_files_from_repo, + add_line_numbers, + get_programming_language_file_extension +) class FeedbackModel(BaseModel): @@ -38,13 +46,26 @@ class Config: async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() - # Get split grading instructions - split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug) - file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions } + # Get split problem statement and grading instructions by file (if necessary) + split_problem_statement, split_grading_instructions = await asyncio.gather( + split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug), + split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug) + ) + + is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split + file_problem_statements = { + item.file_name: item.problem_statement + for item in split_problem_statement.file_problem_statements + } if split_problem_statement is not None else {} - # Get split problem statement - split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug) - file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements } + is_short_grading_instructions = ( + num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split + if exercise.grading_instructions is not None else True + ) + file_grading_instructions = { + item.file_name: item.grading_instructions + for item in split_grading_instructions.file_grading_instructions + } if split_grading_instructions is not None else {} prompt_inputs: List[dict] = [] @@ -53,33 +74,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio template_repo = exercise.get_template_repository() submission_repo = submission.get_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) - if file_extension is None: - raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") - - files = load_files_from_repo( + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") + + # Changed text files + changed_files = load_files_from_repo( submission_repo, - file_filter=lambda x: x.endswith(file_extension) if file_extension else False + file_filter=lambda x: x in changed_files_from_template_to_submission ) - for file_path, content in files.items(): - if content is None: - continue - - problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.") - grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.") - - content = add_line_numbers(content) - solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) - template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) + for file_path, file_content in changed_files.items(): + problem_statement = ( + exercise.problem_statement if is_short_problem_statement + else file_problem_statements.get(file_path, "No relevant problem statement section found.") + ) + problem_statement = problem_statement if problem_statement.strip() else "No problem statement found." + + grading_instructions = ( + exercise.grading_instructions or "" if is_short_grading_instructions + else file_grading_instructions.get(file_path, "No relevant grading instructions found.") + ) + grading_instructions = grading_instructions if grading_instructions.strip() else "No grading instructions found." + + file_content = add_line_numbers(file_content) + solution_to_submission_diff = get_diff( + src_repo=solution_repo, + dst_repo=submission_repo, + src_prefix="solution", + dst_prefix="submission", + file_path=file_path + ) + template_to_submission_diff = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + src_prefix="template", + dst_prefix="submission", + file_path=file_path + ) + template_to_solution_diff = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + src_prefix="template", + dst_prefix="solution", + file_path=file_path + ) prompt_inputs.append({ "file_path": file_path, - "submission": content, + "priority": len(template_to_solution_diff), + "submission_file": file_content, "max_points": exercise.max_points, "bonus_points": exercise.bonus_points, "solution_to_submission_diff": solution_to_submission_diff, "template_to_submission_diff": template_to_submission_diff, + "template_to_solution_diff": template_to_solution_diff, "grading_instructions": grading_instructions, "problem_statement": problem_statement, }) @@ -93,11 +145,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio # Filter long prompts (omitting features if necessary) omittable_features = [ + "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) "problem_statement", "grading_instructions", + "solution_to_submission_diff", "template_to_submission_diff", - "solution_to_submission_diff" ] + # "submission_file" is not omittable, because it is the main input containing the line numbers + # In the future we might be able to include the line numbers in the diff, but for now we need to keep it + prompt_inputs = [ omitted_prompt_input for omitted_prompt_input, should_run in [check_prompt_length_and_omit_features_if_necessary( @@ -110,70 +166,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio if should_run ] - chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt) - if not prompt_inputs: - return [] - result = await chain.agenerate(prompt_inputs) - - logger.info("Generated result: %s ", result) - - return [] - # return predict_and_parse( - # model=model, - # chat_prompt=chat_prompt, - # prompt_input={ - # "grading_instructions": exercise.grading_instructions, - # "changed_files": changed_files - # }, - # pydantic_object=SplitGradingInstructions - # ) - - - - - -# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: - -# # Filter long prompts -# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] - -# # Completion -# chain = LLMChain(llm=chat, prompt=chat_prompt) -# if not input_list: -# return [] -# result = await chain.agenerate(input_list) - -# # Parse result -# feedback_proposals: List[Feedback] = [] -# for input, generations in zip(input_list, result.generations): -# file_path = input["file_path"] -# for generation in generations: -# try: -# feedbacks = json.loads(generation.text) -# except json.JSONDecodeError: -# logger.error("Failed to parse feedback json: %s", generation.text) -# continue -# if not isinstance(feedbacks, list): -# logger.error("Feedback json is not a list: %s", generation.text) -# continue - -# for feedback in feedbacks: -# line = feedback.get("line", None) -# description = feedback.get("text", None) -# credits = feedback.get("credits", 0.0) -# feedback_proposals.append( -# Feedback( -# id=None, -# exercise_id=exercise.id, -# submission_id=submission.id, -# title="Feedback", -# description=description, -# file_path=file_path, -# line_start=line, -# line_end=None, -# credits=credits, -# meta={}, -# ) -# ) - -# return feedback_proposals \ No newline at end of file + # If we have many files we need to filter and prioritize them + if len(prompt_inputs) > config.max_number_of_files: + programming_language_extension = get_programming_language_file_extension(programming_language=exercise.programming_language) + + # Prioritize files that have a diff between solution and submission + prompt_inputs = sorted( + prompt_inputs, + key=lambda x: x["priority"], + reverse=True + ) + + filtered_prompt_inputs = [] + if programming_language_extension is not None: + filtered_prompt_inputs = [ + prompt_input + for prompt_input in prompt_inputs + if prompt_input["file_path"].endswith(programming_language_extension) + ] + + while len(filtered_prompt_inputs) < config.max_number_of_files and prompt_inputs: + filtered_prompt_inputs.append(prompt_inputs.pop(0)) + prompt_inputs = filtered_prompt_inputs + + results: List[AssessmentModel] = await asyncio.gather(*[ + predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=AssessmentModel + ) for prompt_input in prompt_inputs + ]) + + if debug: + emit_meta( + "generate_suggestions", [ + { + "file_path": prompt_input["file_path"], + "prompt": chat_prompt.format(**prompt_input), + "result": result.dict() + } + for prompt_input, result in zip(prompt_inputs, results) + ] + ) + + feedbacks: List[Feedback] = [] + for prompt_input, result in zip(prompt_inputs, results): + file_path = prompt_input["file_path"] + for feedback in result.feedbacks: + feedbacks.append(Feedback( + exercise_id=exercise.id, + submission_id=submission.id, + title=feedback.title, + description=feedback.description, + file_path=file_path, + line_start=feedback.line_start, + line_end=feedback.line_end, + credits=feedback.credits, + meta={} + )) + + return feedbacks diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index 53a300f00..b59ca9dab 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions( return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) -def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): +async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): """Predicts and parses the output of the model Args: @@ -131,22 +131,3 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, chain = LLMChain(llm=model, prompt=chat_prompt) output = chain.run(**prompt_input) return output_parser.parse(output) - - -async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): - """Generates and parses the output of the model - - Args: - model (BaseLanguageModel): The model to generate with - chat_prompt (ChatPromptTemplate): Prompt to use - prompt_input (dict): Input parameters to use for the prompt - pydantic_object (Type[T]): Pydantic model to parse the output - """ - if supports_function_calling(model): - chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) - return chain.run(**prompt_input) - - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - return output_parser.parse(output) \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py index 8f6c67ab8..894f4a6a6 100644 --- a/module_programming_llm/module_programming_llm/helpers/utils.py +++ b/module_programming_llm/module_programming_llm/helpers/utils.py @@ -9,7 +9,7 @@ from langchain.document_loaders import GitLoader -def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, Optional[str]]: +def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]: return { doc.metadata['file_path']: doc.page_content for doc in GitLoader(repo_path=str(repo.working_tree_dir), file_filter=file_filter).load() diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index f1a300ae0..2e6b27059 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -16,8 +16,8 @@ {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} -Student\'s submission to grade (with line numbers : ): -{submission} +Student\'s submission file to grade (with line numbers : ): +{submission_file} Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 8d45b597f..cb1f4a29f 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -84,7 +84,7 @@ async def split_grading_instructions_by_file( if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: return None - split_grading_instructions = predict_and_parse( + split_grading_instructions = await predict_and_parse( model=model, chat_prompt=chat_prompt, prompt_input=prompt_input, diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 1000ad245..28df233bf 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -83,7 +83,7 @@ async def split_problem_statement_by_file( if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: return None - split_problem_statement = predict_and_parse( + split_problem_statement = await predict_and_parse( model=model, chat_prompt=chat_prompt, prompt_input=prompt_input, From 6ac924f45b2af46e53849dde48e54dfd76ff8fed Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 23:46:16 +0200 Subject: [PATCH 07/51] fix spacing --- .../module_programming_llm/generate_suggestions_by_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index ad8daf47e..22240d708 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -14,7 +14,7 @@ num_tokens_from_string, predict_and_parse, ) -from module_programming_llm.helpers.utils import( +from module_programming_llm.helpers.utils import ( get_diff, load_files_from_repo, add_line_numbers, From 54a9823e2c5941f66f9054c6ba153022ee13d2d8 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 00:06:29 +0200 Subject: [PATCH 08/51] fix stuff --- .../module_programming_llm/generate_suggestions_by_file.py | 7 ++++++- .../prompts/generate_suggestions_by_file.py | 3 --- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 22240d708..93dc67564 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -1,4 +1,5 @@ from typing import List, Optional, Sequence +import os import asyncio from pydantic import BaseModel, Field @@ -80,11 +81,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio file_path=None, name_only=True ).split("\n") + changed_files_from_template_to_submission = [ + os.path.join(str(submission_repo.working_tree_dir or ""), file_path) + for file_path in changed_files_from_template_to_submission + ] # Changed text files changed_files = load_files_from_repo( submission_repo, - file_filter=lambda x: x in changed_files_from_template_to_submission + file_filter=lambda file_path: file_path in changed_files_from_template_to_submission ) for file_path, file_content in changed_files.items(): diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index 2e6b27059..344d635d0 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -9,9 +9,6 @@ Problem statement: {problem_statement} -Example solution: -{example_solution} - Grading instructions: {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} From f53feb98b9450afebe3bbc6843a8794d951f84c8 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:41:39 +0200 Subject: [PATCH 09/51] add fixes --- module_programming_llm/module_programming_llm/config.py | 7 +++---- .../prompts/split_problem_statement_by_file.py | 2 +- .../split_problem_statement_by_file.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 3507eded0..3d9225a1f 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -22,23 +22,22 @@ class SplitProblemStatementsByFilePrompt(BaseModel): *Note: `changed_files` are the changed files between template and solution repository.*\ """ - tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_problem_statements_by_file_human_template, description="Message from a human. The input on which the AI is supposed to act.") - + tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") + class SplitGradingInstructionsByFilePrompt(BaseModel): """\ Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** """ - tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_grading_instructions_by_file_human_template, description="Message from a human. The input on which the AI is supposed to act.") - + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") class GenerationPrompt(BaseModel): """\ diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index c06aea30e..b92710c07 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -11,7 +11,7 @@ Changed files from template to sample solution: {changed_files_from_template_to_solution} -Changed files from template to student submission: +Changed files from template to student submission (Pick from this list, very important!): {changed_files_from_template_to_submission} Problem statement by file: diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 28df233bf..1cb35f7c8 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -69,7 +69,7 @@ async def split_problem_statement_by_file( chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, system_message=config.split_problem_statement_by_file_prompt.system_message, - human_message=config.split_problem_statement_by_file_prompt.system_message, + human_message=config.split_problem_statement_by_file_prompt.human_message, pydantic_object=SplitProblemStatement ) From 480f5256b5ecf6a6db1e1fc44e24a88af3869009 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:51:13 +0200 Subject: [PATCH 10/51] more fixes --- .../generate_suggestions_by_file.py | 19 ++++++++++--------- .../split_grading_instructions_by_file.py | 4 ++-- .../split_problem_statement_by_file.py | 2 +- .../split_grading_instructions_by_file.py | 12 ++++++++++-- .../split_problem_statement_by_file.py | 8 ++++++++ 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 93dc67564..732ab557b 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -47,10 +47,18 @@ class Config: async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.generate_suggestions_by_file_prompt.system_message, + human_message=config.generate_suggestions_by_file_prompt.human_message, + pydantic_object=AssessmentModel + ) + + # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( - split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug), - split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug) + split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug), + split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug) ) is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split @@ -141,13 +149,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio "problem_statement": problem_statement, }) - chat_prompt = get_chat_prompt_with_formatting_instructions( - model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, - pydantic_object=AssessmentModel - ) - # Filter long prompts (omitting features if necessary) omittable_features = [ "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 2f181609f..54f2872f6 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file to make it simpler. +Restructure the grading instructions by student changed file to make it simpler. """ human_template = """\ @@ -11,7 +11,7 @@ Changed files from template to sample solution: {changed_files_from_template_to_solution} -Changed files from template to student submission: +Changed files from template to student submission (Pick from this list, very important!): {changed_files_from_template_to_submission} Grading instructions by file: diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index b92710c07..397e34893 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the problem statement by changed file. +Restructure the problem statement by student changed file to make it simpler. """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index cb1f4a29f..97903d809 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -3,6 +3,8 @@ from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -28,6 +30,7 @@ class SplitGradingInstructions(BaseModel): async def split_grading_instructions_by_file( exercise: Exercise, submission: Submission, + prompt: ChatPromptTemplate, config: BasicApproachConfig, debug: bool ) -> Optional[SplitGradingInstructions]: @@ -36,6 +39,7 @@ async def split_grading_instructions_by_file( Args: exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files) submission (Submission): Submission to split the grading instructions for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for grading_instructions config (BasicApproachConfig): Configuration Returns: @@ -44,7 +48,11 @@ async def split_grading_instructions_by_file( # Return None if the grading instructions are too short if (exercise.grading_instructions is None - or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split): + or num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split): + return None + + # Return None if the grading instructions are not in the prompt + if "grading_instructions" not in prompt.input_variables: return None model = config.model.get_model() @@ -92,7 +100,7 @@ async def split_grading_instructions_by_file( ) if debug: - emit_meta("file_problem_statement", { + emit_meta("file_grading_instructions", { "prompt": chat_prompt.format(**prompt_input), "result": split_grading_instructions.dict() }) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 1cb35f7c8..4790f8ce8 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -3,6 +3,8 @@ from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -28,6 +30,7 @@ class SplitProblemStatement(BaseModel): async def split_problem_statement_by_file( exercise: Exercise, submission: Submission, + prompt: ChatPromptTemplate, config: BasicApproachConfig, debug: bool ) -> Optional[SplitProblemStatement]: @@ -36,6 +39,7 @@ async def split_problem_statement_by_file( Args: exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) submission (Submission): Submission to split the problem statement for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for problem_statement config (BasicApproachConfig): Configuration Returns: @@ -45,6 +49,10 @@ async def split_problem_statement_by_file( # Return None if the problem statement is too short if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split: return None + + # Return None if the problem statement not in the prompt + if "problem_statement" not in prompt.input_variables: + return None model = config.model.get_model() From c94d4566de8b058f90c13fdb349dad8bb32de3ca Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:51:29 +0200 Subject: [PATCH 11/51] remove empty line --- .../module_programming_llm/generate_suggestions_by_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 732ab557b..c3cd1201e 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -54,7 +54,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio pydantic_object=AssessmentModel ) - # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug), From 62b6929224ce09dc9ffa513a8d293d0b13adcfbb Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:53:30 +0200 Subject: [PATCH 12/51] delete unuse --- .../basic/basic_feedback_provider.py | 110 ------------------ 1 file changed, 110 deletions(-) delete mode 100644 module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py deleted file mode 100644 index 134fd0e41..000000000 --- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py +++ /dev/null @@ -1,110 +0,0 @@ -# import json -# from typing import List - -# from langchain.chains import LLMChain -# from langchain.prompts import ( -# ChatPromptTemplate, -# SystemMessagePromptTemplate, -# HumanMessagePromptTemplate, -# ) - -# from athena.programming import Exercise, Submission, Feedback -# from athena.logger import logger - -# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers -# from module_programming_llm.helpers.models import chat - -# from ..prompts.basic_feedback_provider import system_template, human_template - -# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: -# max_prompt_length = 2560 -# input_list: List[dict] = [] - -# if exercise.meta['file_grading_instructions'] is None: -# raise ValueError("No file grading instructions found for exercise in meta.") -# if exercise.meta['file_problem_statements'] is None: -# raise ValueError("No file problem statements found for exercise in meta.") - -# # Feature extraction -# solution_repo = exercise.get_solution_repository() -# template_repo = exercise.get_template_repository() -# submission_repo = submission.get_repository() - -# file_extension = get_programming_language_file_extension(exercise.programming_language) -# if file_extension is None: -# raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") - -# for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items(): -# if submission_content is None: -# continue - -# problem_statement = exercise.meta['file_problem_statements'].get(file_path) -# if problem_statement is None: -# logger.info("No problem statement for %s, skipping.", file_path) -# continue - -# grading_instructions = exercise.meta['file_grading_instructions'].get(file_path) -# if grading_instructions is None: -# logger.info("No grading instructions for %s, skipping.", file_path) -# continue - -# submission_content = add_line_numbers(submission_content) -# solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) -# template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) - -# input_list.append({ -# "file_path": file_path, -# "submission_content": submission_content, -# "solution_to_submission_diff": solution_to_submission_diff, -# "template_to_submission_diff": template_to_submission_diff, -# "grading_instructions": grading_instructions, -# "problem_statement": problem_statement, -# }) - -# system_message_prompt = SystemMessagePromptTemplate.from_template(system_template) -# human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) -# chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - -# # Filter long prompts -# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] - -# # Completion -# chain = LLMChain(llm=chat, prompt=chat_prompt) -# if not input_list: -# return [] -# result = await chain.agenerate(input_list) - -# # Parse result -# feedback_proposals: List[Feedback] = [] -# for input, generations in zip(input_list, result.generations): -# file_path = input["file_path"] -# for generation in generations: -# try: -# feedbacks = json.loads(generation.text) -# except json.JSONDecodeError: -# logger.error("Failed to parse feedback json: %s", generation.text) -# continue -# if not isinstance(feedbacks, list): -# logger.error("Feedback json is not a list: %s", generation.text) -# continue - -# for feedback in feedbacks: -# line = feedback.get("line", None) -# description = feedback.get("text", None) -# credits = feedback.get("credits", 0.0) -# feedback_proposals.append( -# Feedback( -# id=None, -# exercise_id=exercise.id, -# submission_id=submission.id, -# title="Feedback", -# description=description, -# file_path=file_path, -# line_start=line, -# line_end=None, -# credits=credits, -# meta={}, -# ) -# ) - -# return feedback_proposals \ No newline at end of file From 4533d99eaaca22a5593bf7a63e57fa3e41034c2c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:09:08 +0200 Subject: [PATCH 13/51] add small changes --- .../module_programming_llm/__main__.py | 1 + .../module_programming_llm/config.py | 13 +++++++------ .../generate_suggestions_by_file.py | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index 786d9d824..556313bf6 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -33,5 +33,6 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co if __name__ == "__main__": + # Preload for token estimation later tiktoken.get_encoding("cl100k_base") app.start() diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 3d9225a1f..743ddf693 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -18,9 +18,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel): """\ -Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** - -*Note: `changed_files` are the changed files between template and solution repository.*\ +Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ """ system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") @@ -31,7 +29,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel): class SplitGradingInstructionsByFilePrompt(BaseModel): """\ -Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** +Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ """ system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") @@ -53,11 +51,14 @@ class GenerationPrompt(BaseModel): class BasicApproachConfig(BaseModel): - """This approach uses a LLM with a single prompt to generate feedback in a single step.""" + """\ +This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ +Then, it generates suggestions for each file independently.\ +""" max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - max_number_of_files: int = Field(default=25, description="Maximum number of files.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt()) split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt()) generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt()) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index c3cd1201e..179881f1b 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -136,8 +136,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio ) prompt_inputs.append({ - "file_path": file_path, - "priority": len(template_to_solution_diff), + "file_path": file_path, # Not really relevant for the prompt + "priority": len(template_to_solution_diff), # Not really relevant for the prompt "submission_file": file_content, "max_points": exercise.max_points, "bonus_points": exercise.bonus_points, @@ -149,15 +149,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio }) # Filter long prompts (omitting features if necessary) + # "submission_file" is not omittable, because it is the main input containing the line numbers + # In the future we might be able to include the line numbers in the diff, but for now we need to keep it omittable_features = [ - "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) + "template_to_solution_diff", # If it is even included in the prompt (has the lowest priority since it is indirectly included in other diffs) "problem_statement", "grading_instructions", "solution_to_submission_diff", - "template_to_submission_diff", + "template_to_submission_diff", # In the future we might indicate the changed lines in the submission_file additionally ] - # "submission_file" is not omittable, because it is the main input containing the line numbers - # In the future we might be able to include the line numbers in the diff, but for now we need to keep it prompt_inputs = [ omitted_prompt_input for omitted_prompt_input, should_run in From 6bf2459eebea6deda3c17069384cfe2c150b9012 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:10:47 +0200 Subject: [PATCH 14/51] typo --- .../module_programming_llm/split_problem_statement_by_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 4790f8ce8..f2c1f0f1f 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -99,7 +99,7 @@ async def split_problem_statement_by_file( ) if debug: - emit_meta("file_problem_statement", { + emit_meta("file_problem_statements", { "prompt": chat_prompt.format(**prompt_input), "result": split_problem_statement.dict() }) From 3b9d0d17e0b73bb41c3a30de899f01646cee8895 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:30:33 +0200 Subject: [PATCH 15/51] add more fixes --- .../split_grading_instructions_by_file.py | 5 ++--- .../split_problem_statement_by_file.py | 5 ++--- .../split_grading_instructions_by_file.py | 20 +++++++++++++++++-- .../split_problem_statement_by_file.py | 20 +++++++++++++++++-- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 54f2872f6..21b754846 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,7 +1,6 @@ system_template = """\ -You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. - -Restructure the grading instructions by student changed file to make it simpler. +Your task is to restructure the grading instructions by student changed file to show a tutor \ +relevant instructions for each file. This should make it easier for the tutor to grade the assignment.\ """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index 397e34893..95dde8787 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,6 @@ system_template = """\ -You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. - -Restructure the problem statement by student changed file to make it simpler. +Your task is to restructure the problem statement by student changed file to show the student \ +relevant information for each file. This should make it easier for the student to solve the assignment.\ """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 97903d809..a565b7e0c 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -1,10 +1,10 @@ from typing import Optional, Sequence -from athena import emit_meta +from collections import defaultdict from pydantic import BaseModel, Field - from langchain.prompts import ChatPromptTemplate +from athena import emit_meta from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -108,4 +108,20 @@ async def split_grading_instructions_by_file( if not split_grading_instructions.file_grading_instructions: return None + # Join duplicate file names (some responses contain multiple grading instructions for the same file) + file_grading_instructions_by_file_name = defaultdict(list) + for file_grading_instruction in split_grading_instructions.file_grading_instructions: + file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction) + + split_grading_instructions.file_grading_instructions = [ + FileGradingInstruction( + file_name=file_name, + grading_instructions="\n".join( + file_grading_instruction.grading_instructions + for file_grading_instruction in file_grading_instructions + ) + ) + for file_name, file_grading_instructions in file_grading_instructions_by_file_name.items() + ] + return split_grading_instructions diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index f2c1f0f1f..ccfc3533a 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -1,10 +1,10 @@ from typing import Optional, Sequence -from athena import emit_meta +from collections import defaultdict from pydantic import BaseModel, Field - from langchain.prompts import ChatPromptTemplate +from athena import emit_meta from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -107,4 +107,20 @@ async def split_problem_statement_by_file( if not split_problem_statement.file_problem_statements: return None + # Join duplicate file names (some responses contain multiple problem statements for the same file) + file_problem_statements_by_file_name = defaultdict(list) + for file_problem_statement in split_problem_statement.file_problem_statements: + file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) + + split_problem_statement.file_problem_statements = [ + FileProblemStatement( + file_name=file_name, + problem_statement="\n".join( + file_problem_statement.problem_statement + for file_problem_statement in file_problem_statements + ) + ) + for file_name, file_problem_statements in file_problem_statements_by_file_name.items() + ] + return split_problem_statement From f14f00c1e62ddc9c1874d8afffa6a5e38eac4b07 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:36:18 +0200 Subject: [PATCH 16/51] fix pydantic --- .../module_programming_llm/split_grading_instructions_by_file.py | 1 + .../module_programming_llm/split_problem_statement_by_file.py | 1 + 2 files changed, 2 insertions(+) diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index a565b7e0c..0b1a2a615 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -27,6 +27,7 @@ class SplitGradingInstructions(BaseModel): file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") +# pylint: disable=too-many-locals async def split_grading_instructions_by_file( exercise: Exercise, submission: Submission, diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index ccfc3533a..cc72cd3f6 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -27,6 +27,7 @@ class SplitProblemStatement(BaseModel): file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") +# pylint: disable=too-many-locals async def split_problem_statement_by_file( exercise: Exercise, submission: Submission, From 814eb2ba00266a8f5ee9630a8e0408df8595cca0 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 11:56:38 +0200 Subject: [PATCH 17/51] small improvements --- .../generate_suggestions_by_file.py | 4 ++-- .../split_grading_instructions_by_file.py | 8 ++++---- .../split_problem_statement_by_file.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 179881f1b..e4d533b21 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -63,7 +63,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split file_problem_statements = { item.file_name: item.problem_statement - for item in split_problem_statement.file_problem_statements + for item in split_problem_statement.items } if split_problem_statement is not None else {} is_short_grading_instructions = ( @@ -72,7 +72,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio ) file_grading_instructions = { item.file_name: item.grading_instructions - for item in split_grading_instructions.file_grading_instructions + for item in split_grading_instructions.items } if split_grading_instructions is not None else {} prompt_inputs: List[dict] = [] diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 0b1a2a615..388d7a865 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -24,7 +24,7 @@ class FileGradingInstruction(BaseModel): class SplitGradingInstructions(BaseModel): """Collection of grading instructions split by file""" - file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") # pylint: disable=too-many-locals @@ -106,15 +106,15 @@ async def split_grading_instructions_by_file( "result": split_grading_instructions.dict() }) - if not split_grading_instructions.file_grading_instructions: + if not split_grading_instructions.items: return None # Join duplicate file names (some responses contain multiple grading instructions for the same file) file_grading_instructions_by_file_name = defaultdict(list) - for file_grading_instruction in split_grading_instructions.file_grading_instructions: + for file_grading_instruction in split_grading_instructions.items: file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction) - split_grading_instructions.file_grading_instructions = [ + split_grading_instructions.items = [ FileGradingInstruction( file_name=file_name, grading_instructions="\n".join( diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index cc72cd3f6..d423a8a48 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -24,7 +24,7 @@ class FileProblemStatement(BaseModel): class SplitProblemStatement(BaseModel): """Collection of problem statements split by file""" - file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + items: Sequence[FileProblemStatement] = Field(..., description="File problem statements") # pylint: disable=too-many-locals @@ -105,15 +105,15 @@ async def split_problem_statement_by_file( "result": split_problem_statement.dict() }) - if not split_problem_statement.file_problem_statements: + if not split_problem_statement.items: return None # Join duplicate file names (some responses contain multiple problem statements for the same file) file_problem_statements_by_file_name = defaultdict(list) - for file_problem_statement in split_problem_statement.file_problem_statements: + for file_problem_statement in split_problem_statement.items: file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) - split_problem_statement.file_problem_statements = [ + split_problem_statement.items = [ FileProblemStatement( file_name=file_name, problem_statement="\n".join( From 80658eb4c65b430908b6692782fb6bdd9bf70ad9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 12:45:42 +0200 Subject: [PATCH 18/51] add final fixes --- .../generate_suggestions_by_file.py | 4 +-- .../helpers/llm_utils.py | 33 ++++++++++++------- .../prompts/generate_suggestions_by_file.py | 16 ++++----- .../split_grading_instructions_by_file.py | 4 +-- .../split_problem_statement_by_file.py | 4 +-- 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index e4d533b21..152fb6660 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -194,7 +194,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio filtered_prompt_inputs.append(prompt_inputs.pop(0)) prompt_inputs = filtered_prompt_inputs - results: List[AssessmentModel] = await asyncio.gather(*[ + results: List[Optional[AssessmentModel]] = await asyncio.gather(*[ predict_and_parse( model=model, chat_prompt=chat_prompt, @@ -209,7 +209,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio { "file_path": prompt_input["file_path"], "prompt": chat_prompt.format(**prompt_input), - "result": result.dict() + "result": result.dict() if result is not None else None } for prompt_input, result in zip(prompt_inputs, results) ] diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index b59ca9dab..394bdd2f7 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -1,6 +1,5 @@ -from typing import Type, TypeVar, List -from pydantic import BaseModel - +from typing import Optional, Type, TypeVar, List +from pydantic import BaseModel, ValidationError import tiktoken from langchain.chains import LLMChain @@ -11,8 +10,9 @@ SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) -from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.chains.openai_functions import create_structured_output_chain +from langchain.output_parsers import PydanticOutputParser +from langchain.schema import OutputParserException from athena import emit_meta @@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions( return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) -async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): +async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]: """Predicts and parses the output of the model Args: @@ -122,12 +122,23 @@ async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTem chat_prompt (ChatPromptTemplate): Prompt to use prompt_input (dict): Input parameters to use for the prompt pydantic_object (Type[T]): Pydantic model to parse the output + + Returns: + Optional[T]: Parsed output, or None if it could not be parsed """ if supports_function_calling(model): chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) - return chain.run(**prompt_input) - - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - return output_parser.parse(output) + + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None + + output_parser = PydanticOutputParser(pydantic_object=pydantic_object) + chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser) + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index 344d635d0..7535fd244 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -1,11 +1,6 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -VERY IMPORTANT: Effective feedback for text assignments should be: -1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\ -""" - -human_template = """\ Problem statement: {problem_statement} @@ -13,12 +8,17 @@ {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} -Student\'s submission file to grade (with line numbers : ): -{submission_file} - Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} +VERY IMPORTANT: Effective feedback for text assignments should be: +1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\ +""" + +human_template = """\ Diff between template (deletions) and student\'s submission (additions): {template_to_submission_diff} + +Student\'s submission file to grade (with line numbers : ): +{submission_file} """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 388d7a865..4ba10cb54 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -103,10 +103,10 @@ async def split_grading_instructions_by_file( if debug: emit_meta("file_grading_instructions", { "prompt": chat_prompt.format(**prompt_input), - "result": split_grading_instructions.dict() + "result": split_grading_instructions.dict() if split_grading_instructions is not None else None }) - if not split_grading_instructions.items: + if split_grading_instructions is None or not split_grading_instructions.items: return None # Join duplicate file names (some responses contain multiple grading instructions for the same file) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index d423a8a48..4a4761610 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -102,10 +102,10 @@ async def split_problem_statement_by_file( if debug: emit_meta("file_problem_statements", { "prompt": chat_prompt.format(**prompt_input), - "result": split_problem_statement.dict() + "result": split_problem_statement.dict() if split_problem_statement is not None else None }) - if not split_problem_statement.items: + if split_problem_statement is None or not split_problem_statement.items: return None # Join duplicate file names (some responses contain multiple problem statements for the same file) From 0da1404fcb4fa0bd94faba2ce68972b205937d4e Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:00:44 +0200 Subject: [PATCH 19/51] fix missing env variables --- env_example/module_text_llm.env | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/env_example/module_text_llm.env b/env_example/module_text_llm.env index d1772bb30..c62a54bf4 100644 --- a/env_example/module_text_llm.env +++ b/env_example/module_text_llm.env @@ -17,7 +17,18 @@ LLM_DEFAULT_MODEL="azure_openai_gpt-35" LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # Azure OpenAI [leave blank if not used] -# Model names prefixed with `azure_openai_` followed by the azure deployment id, e.g. `azure_openai_gpt-35` +# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35` LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed -LLM_AZURE_OPENAI_API_VERSION="2023-05-15" # change base if needed +LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed + +# Replicate [leave blank if not used] +# See https://replicate.com and adjust model config options in `module_text_llm/helpers/models/replicate.py` +REPLICATE_API_TOKEN= + +# LangSmith (can be used for tracing LLMs) [leave blank if not used] +# See https://docs.smith.langchain.com +# LANGCHAIN_TRACING_V2=true +# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +# LANGCHAIN_API_KEY="XXX" +# LANGCHAIN_PROJECT="XXX" \ No newline at end of file From 2bad6c019a03a4853240a358e6b1f53436973c42 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 21:09:42 +0200 Subject: [PATCH 20/51] add changes --- athena/athena/helpers/programming/code_repository.py | 4 ++-- athena/athena/schemas/programming_exercise.py | 2 +- .../prompts/split_grading_instructions_by_file.py | 2 ++ .../prompts/split_problem_statement_by_file.py | 4 +++- .../split_grading_instructions_by_file.py | 7 +++++++ 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/athena/athena/helpers/programming/code_repository.py b/athena/athena/helpers/programming/code_repository.py index 32c264905..53c6f1595 100644 --- a/athena/athena/helpers/programming/code_repository.py +++ b/athena/athena/helpers/programming/code_repository.py @@ -43,7 +43,7 @@ def get_repository(url: str) -> Repo: repo_zip.extractall(cache_dir_path) if not (cache_dir_path / ".git").exists(): repo = Repo.init(cache_dir_path, initial_branch='main') - repo.index.add(repo.untracked_files) - repo.index.commit("Initial commit") + repo.git.add(all=True, force=True) + repo.git.commit('-m', 'Initial commit') return Repo(cache_dir_path) \ No newline at end of file diff --git a/athena/athena/schemas/programming_exercise.py b/athena/athena/schemas/programming_exercise.py index 0f9d40c44..2ac9610e5 100644 --- a/athena/athena/schemas/programming_exercise.py +++ b/athena/athena/schemas/programming_exercise.py @@ -1,6 +1,6 @@ from pydantic import Field, AnyUrl from zipfile import ZipFile -from git import Repo +from git.repo import Repo from athena.helpers.programming.code_repository import get_repository_zip, get_repository from .exercise_type import ExerciseType diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 7c0cc685d..833f7ddd6 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -10,4 +10,6 @@ Changed files: {changed_files} + +Grading instructions by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index a1ee99f9f..6175ec07f 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file. +Restructure the problem statement by changed file. """ human_template = """\ @@ -10,4 +10,6 @@ Changed files: {changed_files} + +Problem statement by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index fefa93e14..0982e5c2a 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -5,6 +5,7 @@ from athena.programming import Exercise from athena.storage import store_exercise +from athena.logger import logger from module_programming_llm.config import BasicApproachConfig from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse @@ -44,6 +45,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach file_extension = get_programming_language_file_extension(exercise.programming_language) or "" changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + # logger.info("Exercise: %s", file_extension) + # logger.info("Changed files: %s", changed_files) + # logger.info("Solution repo: %s", solution_repo) + # logger.info("Template repo: %s", template_repo) + # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) + chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, system_message=config.split_grading_instructions_by_file_prompt.system_message, From 46a3cc3a07b19674c1b8cc5237d7a598df06a627 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 21:30:15 +0200 Subject: [PATCH 21/51] change prompt --- module_programming_llm/module_programming_llm/config.py | 8 ++++---- .../prompts/split_grading_instructions_by_file.py | 9 ++++++--- .../prompts/split_problem_statement_by_file.py | 7 +++++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 173ec1255..0d6cc7e95 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -18,10 +18,11 @@ class SplitProblemStatementsByFilePrompt(BaseModel): """\ -Features available: **{problem_statement}**, **{changed_files}**\ +Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** *Note: `changed_files` are the changed files between template and solution repository.*\ """ + tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_problem_statements_by_file_human_template, @@ -30,10 +31,9 @@ class SplitProblemStatementsByFilePrompt(BaseModel): class SplitGradingInstructionsByFilePrompt(BaseModel): """\ -Features available: **{grading_instructions}**, **{changed_files}** - -*Note: `changed_files` are the changed files between template and solution repository.*\ +Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** """ + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_grading_instructions_by_file_human_template, diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 833f7ddd6..2f181609f 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,15 +1,18 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file. +Restructure the grading instructions by changed file to make it simpler. """ human_template = """\ Grading instructions: {grading_instructions} -Changed files: -{changed_files} +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission: +{changed_files_from_template_to_submission} Grading instructions by file: """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index 6175ec07f..c06aea30e 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -8,8 +8,11 @@ Problem statement: {problem_statement} -Changed files: -{changed_files} +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission: +{changed_files_from_template_to_submission} Problem statement by file: """ \ No newline at end of file From 9be4514bd63c715be5fa2960001efffcef751aef Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 22:20:06 +0200 Subject: [PATCH 22/51] update split problem statements and grading instructions --- .../module_programming_llm/__main__.py | 8 +- .../split_grading_instructions_by_file.py | 99 ++++++++++--------- .../split_problem_statement_by_file.py | 97 +++++++++--------- 3 files changed, 103 insertions(+), 101 deletions(-) diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index e4fb070fe..786d9d824 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -8,18 +8,12 @@ from module_programming_llm.config import Configuration from module_programming_llm.generate_suggestions_by_file import generate_suggestions_by_file -from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed -from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed @submissions_consumer -def receive_submissions(exercise: Exercise, submissions: List[Submission], module_config: Configuration): +def receive_submissions(exercise: Exercise, submissions: List[Submission]): logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id) - # Split problem statements and grading instructions for later - generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) - generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=module_config.approach, debug=module_config.debug) - @submission_selector def select_submission(exercise: Exercise, submissions: List[Submission]) -> Submission: diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 0982e5c2a..8d45b597f 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -3,16 +3,16 @@ from pydantic import BaseModel, Field -from athena.programming import Exercise -from athena.storage import store_exercise -from athena.logger import logger +from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension - - -FILE_GRADING_INSTRUCTIONS_KEY = "file_grading_instructions" +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff class FileGradingInstruction(BaseModel): @@ -22,34 +22,50 @@ class FileGradingInstruction(BaseModel): class SplitGradingInstructions(BaseModel): """Collection of grading instructions split by file""" - instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") -def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: +async def split_grading_instructions_by_file( + exercise: Exercise, + submission: Submission, + config: BasicApproachConfig, + debug: bool + ) -> Optional[SplitGradingInstructions]: """Split the general grading instructions by file Args: - exercise (Exercise): Exercise to split the grading instructions for + exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files) + submission (Submission): Submission to split the grading instructions for (respecting the changed files) config (BasicApproachConfig): Configuration Returns: - SplitGradingInstructions: Grading instructions split by file, empty if input was too long + Optional[SplitGradingInstructions]: Split grading instructions, None if it is too short or too long """ - if exercise.grading_instructions is None or exercise.grading_instructions.strip() == "": - return SplitGradingInstructions(instructions=[]) + + # Return None if the grading instructions are too short + if (exercise.grading_instructions is None + or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split): + return None model = config.model.get_model() - solution_repo = exercise.get_solution_repository() template_repo = exercise.get_template_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) - - # logger.info("Exercise: %s", file_extension) - # logger.info("Changed files: %s", changed_files) - # logger.info("Solution repo: %s", solution_repo) - # logger.info("Template repo: %s", template_repo) - # solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True + ).split("\n") + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, @@ -60,15 +76,13 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach prompt_input = { "grading_instructions": exercise.grading_instructions, - "changed_files": changed_files + "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution), + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } - # If the input is too long, return an empty SplitGradingInstructions object - prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) - if prompt_length > config.max_input_tokens: - if debug: - emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") - return SplitGradingInstructions(instructions=[]) + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None split_grading_instructions = predict_and_parse( model=model, @@ -78,25 +92,12 @@ def split_grading_instructions_by_file(exercise: Exercise, config: BasicApproach ) if debug: - emit_meta(f"{FILE_GRADING_INSTRUCTIONS_KEY}_data", split_grading_instructions.dict()) - - return split_grading_instructions + emit_meta("file_problem_statement", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_grading_instructions.dict() + }) + if not split_grading_instructions.file_grading_instructions: + return None -def generate_and_store_split_grading_instructions_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitGradingInstructions: - """Generate and store the split grading instructions if needed - - Args: - exercise (Exercise): Exercise to get the split grading instructions for - config (BasicApproachConfig): Configuration - - Returns: - SplitGradingInstructions: Grading instructions split by file - """ - if FILE_GRADING_INSTRUCTIONS_KEY in exercise.meta: - return SplitGradingInstructions.parse_obj(exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY]) - - split_grading_instructions = split_grading_instructions_by_file(exercise=exercise, config=config, debug=debug) - exercise.meta[FILE_GRADING_INSTRUCTIONS_KEY] = split_grading_instructions.dict() - store_exercise(exercise) return split_grading_instructions diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 8329e5749..1000ad245 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -1,17 +1,18 @@ -from typing import Sequence +from typing import Optional, Sequence from athena import emit_meta from pydantic import BaseModel, Field -from athena.programming import Exercise -from athena.storage import store_exercise +from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.helpers.llm_utils import get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, predict_and_parse -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension - - -FILE_PROBLEM_STATEMETS_KEY = "file_problem_statements" +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff class FileProblemStatement(BaseModel): @@ -21,28 +22,49 @@ class FileProblemStatement(BaseModel): class SplitProblemStatement(BaseModel): """Collection of problem statements split by file""" - problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") -def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: +async def split_problem_statement_by_file( + exercise: Exercise, + submission: Submission, + config: BasicApproachConfig, + debug: bool + ) -> Optional[SplitProblemStatement]: """Split the general problem statement by file Args: - exercise (Exercise): Exercise to split the problem statement for + exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) + submission (Submission): Submission to split the problem statement for (respecting the changed files) config (BasicApproachConfig): Configuration Returns: - SplitProblemStatement: Problem statement split by file, empty if input was too long + Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long """ - if exercise.problem_statement.strip() == "": - return SplitProblemStatement(problem_statements=[]) + # Return None if the problem statement is too short + if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split: + return None + model = config.model.get_model() - - solution_repo = exercise.get_solution_repository() + template_repo = exercise.get_template_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) or "" - changed_files = get_diff(src_repo=template_repo, dst_repo=solution_repo, file_path=f"*{file_extension}", name_only=True) + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True + ).split("\n") + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, @@ -50,18 +72,16 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon human_message=config.split_problem_statement_by_file_prompt.system_message, pydantic_object=SplitProblemStatement ) - + prompt_input = { "problem_statement": exercise.problem_statement, - "changed_files": changed_files + "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution), + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } - # If the input is too long, return an empty SplitProblemStatement object - prompt_length = num_tokens_from_prompt(chat_prompt, prompt_input) - if prompt_length > config.max_input_tokens: - if debug: - emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_error", f"Input too long: {prompt_length} > {config.max_input_tokens}") - return SplitProblemStatement(problem_statements=[]) + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None split_problem_statement = predict_and_parse( model=model, @@ -71,25 +91,12 @@ def split_problem_statement_by_file(exercise: Exercise, config: BasicApproachCon ) if debug: - emit_meta(f"{FILE_PROBLEM_STATEMETS_KEY}_data", split_problem_statement.dict()) + emit_meta("file_problem_statement", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_problem_statement.dict() + }) - return split_problem_statement - - -def generate_and_store_split_problem_statement_if_needed(exercise: Exercise, config: BasicApproachConfig, debug: bool) -> SplitProblemStatement: - """Generate and store the split problem statement if needed - - Args: - exercise (Exercise): Exercise to split the problem statement for - config (BasicApproachConfig): Configuration - - Returns: - SplitProblemStatement: Problem statement split by file - """ - if FILE_PROBLEM_STATEMETS_KEY in exercise.meta: - return SplitProblemStatement.parse_obj(exercise.meta[FILE_PROBLEM_STATEMETS_KEY]) + if not split_problem_statement.file_problem_statements: + return None - split_problem_statement = split_problem_statement_by_file(exercise=exercise, config=config, debug=debug) - exercise.meta[FILE_PROBLEM_STATEMETS_KEY] = split_problem_statement.dict() - store_exercise(exercise) return split_problem_statement From 9b00f2dca0a2db66bae462ce2456b0d6da274339 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 23:46:04 +0200 Subject: [PATCH 23/51] refactor generator --- .../module_programming_llm/config.py | 5 +- .../generate_suggestions_by_file.py | 246 +++++++++++------- .../helpers/llm_utils.py | 21 +- .../module_programming_llm/helpers/utils.py | 2 +- .../prompts/generate_suggestions_by_file.py | 4 +- .../split_grading_instructions_by_file.py | 2 +- .../split_problem_statement_by_file.py | 2 +- 7 files changed, 157 insertions(+), 125 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 0d6cc7e95..3507eded0 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -43,9 +43,9 @@ class SplitGradingInstructionsByFilePrompt(BaseModel): class GenerationPrompt(BaseModel): """\ Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, \ -**{submission}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}** +**{submission_file}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}** -*Note: Prompt will be applied per file independently, submission is a single file.*\ +*Note: Prompt will be applied per file independently. Also, you don't have to include all features, e.g. template_to_solution_diff.*\ """ system_message: str = Field(default=generate_suggestions_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") @@ -58,6 +58,7 @@ class BasicApproachConfig(BaseModel): max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_number_of_files: int = Field(default=25, description="Maximum number of files.") split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt()) split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt()) generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt()) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index bc93269eb..ad8daf47e 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -1,17 +1,25 @@ from typing import List, Optional, Sequence - +import asyncio from pydantic import BaseModel, Field -from langchain.chains.openai_functions import create_structured_output_chain from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from athena.logger import logger from module_programming_llm.config import BasicApproachConfig -from module_programming_llm.split_grading_instructions_by_file import generate_and_store_split_grading_instructions_if_needed -from module_programming_llm.split_problem_statement_by_file import generate_and_store_split_problem_statement_if_needed -from module_programming_llm.helpers.llm_utils import check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions -from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers +from module_programming_llm.split_grading_instructions_by_file import split_grading_instructions_by_file +from module_programming_llm.split_problem_statement_by_file import split_problem_statement_by_file +from module_programming_llm.helpers.llm_utils import ( + check_prompt_length_and_omit_features_if_necessary, + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + predict_and_parse, +) +from module_programming_llm.helpers.utils import( + get_diff, + load_files_from_repo, + add_line_numbers, + get_programming_language_file_extension +) class FeedbackModel(BaseModel): @@ -38,13 +46,26 @@ class Config: async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() - # Get split grading instructions - split_grading_instructions = generate_and_store_split_grading_instructions_if_needed(exercise=exercise, config=config, debug=debug) - file_grading_instructions = { item.file_name: item.grading_instructions for item in split_grading_instructions.instructions } + # Get split problem statement and grading instructions by file (if necessary) + split_problem_statement, split_grading_instructions = await asyncio.gather( + split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug), + split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug) + ) + + is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split + file_problem_statements = { + item.file_name: item.problem_statement + for item in split_problem_statement.file_problem_statements + } if split_problem_statement is not None else {} - # Get split problem statement - split_problem_statement = generate_and_store_split_problem_statement_if_needed(exercise=exercise, config=config, debug=debug) - file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.problem_statements } + is_short_grading_instructions = ( + num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split + if exercise.grading_instructions is not None else True + ) + file_grading_instructions = { + item.file_name: item.grading_instructions + for item in split_grading_instructions.file_grading_instructions + } if split_grading_instructions is not None else {} prompt_inputs: List[dict] = [] @@ -53,33 +74,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio template_repo = exercise.get_template_repository() submission_repo = submission.get_repository() - file_extension = get_programming_language_file_extension(exercise.programming_language) - if file_extension is None: - raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") - - files = load_files_from_repo( + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") + + # Changed text files + changed_files = load_files_from_repo( submission_repo, - file_filter=lambda x: x.endswith(file_extension) if file_extension else False + file_filter=lambda x: x in changed_files_from_template_to_submission ) - for file_path, content in files.items(): - if content is None: - continue - - problem_statement = file_problem_statements.get(file_path, "No relevant problem statement section found.") - grading_instructions = file_grading_instructions.get(file_path, "No relevant grading instructions found.") - - content = add_line_numbers(content) - solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) - template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) + for file_path, file_content in changed_files.items(): + problem_statement = ( + exercise.problem_statement if is_short_problem_statement + else file_problem_statements.get(file_path, "No relevant problem statement section found.") + ) + problem_statement = problem_statement if problem_statement.strip() else "No problem statement found." + + grading_instructions = ( + exercise.grading_instructions or "" if is_short_grading_instructions + else file_grading_instructions.get(file_path, "No relevant grading instructions found.") + ) + grading_instructions = grading_instructions if grading_instructions.strip() else "No grading instructions found." + + file_content = add_line_numbers(file_content) + solution_to_submission_diff = get_diff( + src_repo=solution_repo, + dst_repo=submission_repo, + src_prefix="solution", + dst_prefix="submission", + file_path=file_path + ) + template_to_submission_diff = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + src_prefix="template", + dst_prefix="submission", + file_path=file_path + ) + template_to_solution_diff = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + src_prefix="template", + dst_prefix="solution", + file_path=file_path + ) prompt_inputs.append({ "file_path": file_path, - "submission": content, + "priority": len(template_to_solution_diff), + "submission_file": file_content, "max_points": exercise.max_points, "bonus_points": exercise.bonus_points, "solution_to_submission_diff": solution_to_submission_diff, "template_to_submission_diff": template_to_submission_diff, + "template_to_solution_diff": template_to_solution_diff, "grading_instructions": grading_instructions, "problem_statement": problem_statement, }) @@ -93,11 +145,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio # Filter long prompts (omitting features if necessary) omittable_features = [ + "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) "problem_statement", "grading_instructions", + "solution_to_submission_diff", "template_to_submission_diff", - "solution_to_submission_diff" ] + # "submission_file" is not omittable, because it is the main input containing the line numbers + # In the future we might be able to include the line numbers in the diff, but for now we need to keep it + prompt_inputs = [ omitted_prompt_input for omitted_prompt_input, should_run in [check_prompt_length_and_omit_features_if_necessary( @@ -110,70 +166,64 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio if should_run ] - chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt) - if not prompt_inputs: - return [] - result = await chain.agenerate(prompt_inputs) - - logger.info("Generated result: %s ", result) - - return [] - # return predict_and_parse( - # model=model, - # chat_prompt=chat_prompt, - # prompt_input={ - # "grading_instructions": exercise.grading_instructions, - # "changed_files": changed_files - # }, - # pydantic_object=SplitGradingInstructions - # ) - - - - - -# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: - -# # Filter long prompts -# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] - -# # Completion -# chain = LLMChain(llm=chat, prompt=chat_prompt) -# if not input_list: -# return [] -# result = await chain.agenerate(input_list) - -# # Parse result -# feedback_proposals: List[Feedback] = [] -# for input, generations in zip(input_list, result.generations): -# file_path = input["file_path"] -# for generation in generations: -# try: -# feedbacks = json.loads(generation.text) -# except json.JSONDecodeError: -# logger.error("Failed to parse feedback json: %s", generation.text) -# continue -# if not isinstance(feedbacks, list): -# logger.error("Feedback json is not a list: %s", generation.text) -# continue - -# for feedback in feedbacks: -# line = feedback.get("line", None) -# description = feedback.get("text", None) -# credits = feedback.get("credits", 0.0) -# feedback_proposals.append( -# Feedback( -# id=None, -# exercise_id=exercise.id, -# submission_id=submission.id, -# title="Feedback", -# description=description, -# file_path=file_path, -# line_start=line, -# line_end=None, -# credits=credits, -# meta={}, -# ) -# ) - -# return feedback_proposals \ No newline at end of file + # If we have many files we need to filter and prioritize them + if len(prompt_inputs) > config.max_number_of_files: + programming_language_extension = get_programming_language_file_extension(programming_language=exercise.programming_language) + + # Prioritize files that have a diff between solution and submission + prompt_inputs = sorted( + prompt_inputs, + key=lambda x: x["priority"], + reverse=True + ) + + filtered_prompt_inputs = [] + if programming_language_extension is not None: + filtered_prompt_inputs = [ + prompt_input + for prompt_input in prompt_inputs + if prompt_input["file_path"].endswith(programming_language_extension) + ] + + while len(filtered_prompt_inputs) < config.max_number_of_files and prompt_inputs: + filtered_prompt_inputs.append(prompt_inputs.pop(0)) + prompt_inputs = filtered_prompt_inputs + + results: List[AssessmentModel] = await asyncio.gather(*[ + predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=AssessmentModel + ) for prompt_input in prompt_inputs + ]) + + if debug: + emit_meta( + "generate_suggestions", [ + { + "file_path": prompt_input["file_path"], + "prompt": chat_prompt.format(**prompt_input), + "result": result.dict() + } + for prompt_input, result in zip(prompt_inputs, results) + ] + ) + + feedbacks: List[Feedback] = [] + for prompt_input, result in zip(prompt_inputs, results): + file_path = prompt_input["file_path"] + for feedback in result.feedbacks: + feedbacks.append(Feedback( + exercise_id=exercise.id, + submission_id=submission.id, + title=feedback.title, + description=feedback.description, + file_path=file_path, + line_start=feedback.line_start, + line_end=feedback.line_end, + credits=feedback.credits, + meta={} + )) + + return feedbacks diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index 53a300f00..b59ca9dab 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions( return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) -def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): +async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): """Predicts and parses the output of the model Args: @@ -131,22 +131,3 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, chain = LLMChain(llm=model, prompt=chat_prompt) output = chain.run(**prompt_input) return output_parser.parse(output) - - -async def agenerate_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): - """Generates and parses the output of the model - - Args: - model (BaseLanguageModel): The model to generate with - chat_prompt (ChatPromptTemplate): Prompt to use - prompt_input (dict): Input parameters to use for the prompt - pydantic_object (Type[T]): Pydantic model to parse the output - """ - if supports_function_calling(model): - chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) - return chain.run(**prompt_input) - - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - return output_parser.parse(output) \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/helpers/utils.py b/module_programming_llm/module_programming_llm/helpers/utils.py index 8f6c67ab8..894f4a6a6 100644 --- a/module_programming_llm/module_programming_llm/helpers/utils.py +++ b/module_programming_llm/module_programming_llm/helpers/utils.py @@ -9,7 +9,7 @@ from langchain.document_loaders import GitLoader -def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, Optional[str]]: +def load_files_from_repo(repo: Repo, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]: return { doc.metadata['file_path']: doc.page_content for doc in GitLoader(repo_path=str(repo.working_tree_dir), file_filter=file_filter).load() diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index f1a300ae0..2e6b27059 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -16,8 +16,8 @@ {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} -Student\'s submission to grade (with line numbers : ): -{submission} +Student\'s submission file to grade (with line numbers : ): +{submission_file} Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 8d45b597f..cb1f4a29f 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -84,7 +84,7 @@ async def split_grading_instructions_by_file( if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: return None - split_grading_instructions = predict_and_parse( + split_grading_instructions = await predict_and_parse( model=model, chat_prompt=chat_prompt, prompt_input=prompt_input, diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 1000ad245..28df233bf 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -83,7 +83,7 @@ async def split_problem_statement_by_file( if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: return None - split_problem_statement = predict_and_parse( + split_problem_statement = await predict_and_parse( model=model, chat_prompt=chat_prompt, prompt_input=prompt_input, From 33cc686518d018dd768bcdfb89444451bf857354 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 23:46:16 +0200 Subject: [PATCH 24/51] fix spacing --- .../module_programming_llm/generate_suggestions_by_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index ad8daf47e..22240d708 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -14,7 +14,7 @@ num_tokens_from_string, predict_and_parse, ) -from module_programming_llm.helpers.utils import( +from module_programming_llm.helpers.utils import ( get_diff, load_files_from_repo, add_line_numbers, From 01e736425e535dfc2793dd41f60de04e841d0a73 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:02:22 +0200 Subject: [PATCH 25/51] reorder imports --- .../module_text_llm/suggest_feedback_basic.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/module_text_llm/module_text_llm/suggest_feedback_basic.py b/module_text_llm/module_text_llm/suggest_feedback_basic.py index 96d0d258d..95573fc96 100644 --- a/module_text_llm/module_text_llm/suggest_feedback_basic.py +++ b/module_text_llm/module_text_llm/suggest_feedback_basic.py @@ -1,4 +1,5 @@ from typing import List, Optional, Sequence +from pydantic import BaseModel, Field from langchain.chat_models import ChatOpenAI from langchain.chains import LLMChain @@ -9,20 +10,15 @@ ) from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.schema.output_parser import OutputParserException +from langchain.chains.openai_functions import create_structured_output_chain from athena import emit_meta from athena.text import Exercise, Submission, Feedback from athena.logger import logger -from pydantic import BaseModel, Field -from module_text_llm.config import BasicApproachConfig +from module_text_llm.config import BasicApproachConfig from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, num_tokens_from_string - -from langchain.chains.openai_functions import ( - create_structured_output_chain, -) - class FeedbackModel(BaseModel): title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error") description: str = Field(..., description="Feedback description") From d129645a8cb1efec48cf31d58d2edde8cf21a3f1 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:03:33 +0200 Subject: [PATCH 26/51] update name --- module_text_llm/module_text_llm/__main__.py | 6 +++--- .../{suggest_feedback_basic.py => generate_suggestions.py} | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename module_text_llm/module_text_llm/{suggest_feedback_basic.py => generate_suggestions.py} (97%) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 457ff59e7..e9bf8d448 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -8,7 +8,7 @@ from athena.logger import logger from module_text_llm.config import Configuration -from .suggest_feedback_basic import suggest_feedback_basic +from module_text_llm.generate_suggestions import generate_suggestions @submissions_consumer @@ -24,13 +24,13 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm @feedback_consumer def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]): - logger.info("process_feedback: Received feedbacks for submission %d of exercise %d.", submission.id, exercise.id) + logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id) @feedback_provider async def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]: logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id) - return await suggest_feedback_basic(exercise, submission, module_config.approach, module_config.debug) + return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug) if __name__ == "__main__": diff --git a/module_text_llm/module_text_llm/suggest_feedback_basic.py b/module_text_llm/module_text_llm/generate_suggestions.py similarity index 97% rename from module_text_llm/module_text_llm/suggest_feedback_basic.py rename to module_text_llm/module_text_llm/generate_suggestions.py index 95573fc96..519de9e01 100644 --- a/module_text_llm/module_text_llm/suggest_feedback_basic.py +++ b/module_text_llm/module_text_llm/generate_suggestions.py @@ -68,7 +68,7 @@ def check_token_length_and_omit_from_input_if_necessary(prompt: ChatPromptTempla # pylint: disable-msg=too-many-locals -async def suggest_feedback_basic(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: +async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() prompt_input = { From 7125521779487e484d4fcc2be7bbb4d26a6abaf0 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:06:35 +0200 Subject: [PATCH 27/51] rename and improve docs --- module_text_llm/module_text_llm/config.py | 11 ++++++----- ...gest_feedback_basic.py => generate_suggestions.py} | 0 2 files changed, 6 insertions(+), 5 deletions(-) rename module_text_llm/module_text_llm/prompts/{suggest_feedback_basic.py => generate_suggestions.py} (100%) diff --git a/module_text_llm/module_text_llm/config.py b/module_text_llm/module_text_llm/config.py index 8cfbb9dc8..8598ee8de 100644 --- a/module_text_llm/module_text_llm/config.py +++ b/module_text_llm/module_text_llm/config.py @@ -2,13 +2,14 @@ from athena import config_schema_provider from module_text_llm.helpers.models import ModelConfigType, DefaultModelConfig -from .prompts.suggest_feedback_basic import system_template, human_template +from module_text_llm.prompts.generate_suggestions import system_template, human_template -class BasicPrompt(BaseModel): +class GenerateSuggestionsPrompt(BaseModel): """\ -Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{submission}**, **{max_points}**, **{bonus_points}** -**{problem_statement}** or **{example_solution}** might be omitted if the input is too long.\ +Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}** + +_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\ """ system_message: str = Field(default=system_template, description="Message for priming AI behavior and instructing it what to do.") @@ -20,7 +21,7 @@ class BasicApproachConfig(BaseModel): """This approach uses a LLM with a single prompt to generate feedback in a single step.""" max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - prompt: BasicPrompt = Field(default=BasicPrompt()) + prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt()) @config_schema_provider diff --git a/module_text_llm/module_text_llm/prompts/suggest_feedback_basic.py b/module_text_llm/module_text_llm/prompts/generate_suggestions.py similarity index 100% rename from module_text_llm/module_text_llm/prompts/suggest_feedback_basic.py rename to module_text_llm/module_text_llm/prompts/generate_suggestions.py From 4d784fa2c825444c75f00e8ac2d3310b7d35db6d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:07:17 +0200 Subject: [PATCH 28/51] change naming --- module_text_llm/module_text_llm/config.py | 2 +- module_text_llm/module_text_llm/generate_suggestions.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/module_text_llm/module_text_llm/config.py b/module_text_llm/module_text_llm/config.py index 8598ee8de..17519aea5 100644 --- a/module_text_llm/module_text_llm/config.py +++ b/module_text_llm/module_text_llm/config.py @@ -21,7 +21,7 @@ class BasicApproachConfig(BaseModel): """This approach uses a LLM with a single prompt to generate feedback in a single step.""" max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt()) + generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt()) @config_schema_provider diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py index 519de9e01..0fc34277d 100644 --- a/module_text_llm/module_text_llm/generate_suggestions.py +++ b/module_text_llm/module_text_llm/generate_suggestions.py @@ -87,13 +87,13 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi # Prepare prompt if supports_function_calling: - system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message) - human_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.human_message) + system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message) + human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message) else: - system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message + "\n{format_instructions}") + system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message + "\n{format_instructions}") system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} system_message_prompt.prompt.input_variables.remove("format_instructions") - human_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.human_message + "\nJSON Response:") + human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message + "\nJSON Response:") chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) prompt_input, should_run = check_token_length_and_omit_from_input_if_necessary(chat_prompt, prompt_input, config.max_input_tokens, debug) From 41602c77b7b880540d76b5c7e63fd4eb17b21fb9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 19 Aug 2023 14:28:23 +0200 Subject: [PATCH 29/51] refactor --- .../module_text_llm/generate_suggestions.py | 107 ++++---------- .../module_text_llm/helpers/llm_utils.py | 134 ++++++++++++++++++ 2 files changed, 165 insertions(+), 76 deletions(-) create mode 100644 module_text_llm/module_text_llm/helpers/llm_utils.py diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py index 0fc34277d..b8110be1e 100644 --- a/module_text_llm/module_text_llm/generate_suggestions.py +++ b/module_text_llm/module_text_llm/generate_suggestions.py @@ -1,22 +1,17 @@ from typing import List, Optional, Sequence from pydantic import BaseModel, Field -from langchain.chat_models import ChatOpenAI -from langchain.chains import LLMChain -from langchain.prompts import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) -from langchain.output_parsers import PydanticOutputParser, OutputFixingParser -from langchain.schema.output_parser import OutputParserException -from langchain.chains.openai_functions import create_structured_output_chain - from athena import emit_meta from athena.text import Exercise, Submission, Feedback from athena.logger import logger from module_text_llm.config import BasicApproachConfig +from module_text_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + check_prompt_length_and_omit_features_if_necessary, + num_tokens_from_prompt, + predict_and_parse +) from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, num_tokens_from_string class FeedbackModel(BaseModel): @@ -39,35 +34,6 @@ class Config: title = "Assessment" -def check_token_length_and_omit_from_input_if_necessary(prompt: ChatPromptTemplate, prompt_input, max_input_tokens: int, debug: bool): - if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens: - return prompt_input, True - - omitted_features = [] - - # Input is too long -> Try to omit example_solution - if "example_solution" in prompt_input: - prompt_input["example_solution"] = "omitted" - omitted_features.append("example_solution") - if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens: - if debug: - emit_meta("omitted_features", omitted_features) - return prompt_input, True - - # Input is still too long -> Try to omit problem_statement - if "problem_statement" in prompt_input: - prompt_input["problem_statement"] = "omitted" - omitted_features.append("problem_statement") - if num_tokens_from_string(prompt.format(**prompt_input)) <= max_input_tokens: - if debug: - emit_meta("omitted_features", omitted_features) - return prompt_input, True - - # Input is still too long -> Model should not run - return prompt_input, False - - -# pylint: disable-msg=too-many-locals async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() @@ -80,46 +46,37 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi "submission": add_sentence_numbers(submission.text) } - supports_function_calling = isinstance(model, ChatOpenAI) - - # Output parser for non-function-calling models - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=AssessmentModel), llm=model) - - # Prepare prompt - if supports_function_calling: - system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message) - human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message) - else: - system_message_prompt = SystemMessagePromptTemplate.from_template(config.generate_suggestions_prompt.system_message + "\n{format_instructions}") - system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} - system_message_prompt.prompt.input_variables.remove("format_instructions") - human_message_prompt = HumanMessagePromptTemplate.from_template(config.generate_suggestions_prompt.human_message + "\nJSON Response:") - chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - - prompt_input, should_run = check_token_length_and_omit_from_input_if_necessary(chat_prompt, prompt_input, config.max_input_tokens, debug) + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.generate_suggestions_prompt.system_message, + human_message=config.generate_suggestions_prompt.human_message, + pydantic_object=AssessmentModel + ) + + # Check if the prompt is too long and omit features if necessary (in order of importance) + omittable_features = ["example_solution", "problem_statement", "grading_instructions"] + prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary( + prompt=chat_prompt, + prompt_input= prompt_input, + max_input_tokens=config.max_input_tokens, + omittable_features=omittable_features, + debug=debug + ) + + # Skip if the prompt is too long if not should_run: logger.warning("Input too long. Skipping.") if debug: emit_meta("prompt", chat_prompt.format(**prompt_input)) - emit_meta("error", "Input too long. Skipping.") - - # Return early since we cannot run the model + emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}") return [] - if supports_function_calling: - chain = create_structured_output_chain(AssessmentModel, llm=model, prompt=chat_prompt) - result = chain.run(**prompt_input) - else: - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - - try: - result = output_parser.parse(output) - except OutputParserException as e: - logger.warning("Could not parse and fix output: %s", e) - result = AssessmentModel(feedbacks=[]) - if debug: - emit_meta("parsing_error", output) + result = predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=AssessmentModel + ) if debug: emit_meta("prompt", chat_prompt.format(**prompt_input)) @@ -128,8 +85,6 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi for feedback in result.feedbacks: index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text) feedbacks.append(Feedback( - id=None, - grading_instruction_id=None, exercise_id=exercise.id, submission_id=submission.id, title=feedback.title, diff --git a/module_text_llm/module_text_llm/helpers/llm_utils.py b/module_text_llm/module_text_llm/helpers/llm_utils.py new file mode 100644 index 000000000..1cfd646e6 --- /dev/null +++ b/module_text_llm/module_text_llm/helpers/llm_utils.py @@ -0,0 +1,134 @@ +from typing import Type, TypeVar, List +from pydantic import BaseModel + +import tiktoken + +from langchain.chains import LLMChain +from langchain.chat_models import ChatOpenAI +from langchain.base_language import BaseLanguageModel +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) +from langchain.output_parsers import PydanticOutputParser, OutputFixingParser +from langchain.chains.openai_functions import create_structured_output_chain + +from athena import emit_meta + + +T = TypeVar("T", bound=BaseModel) + + +def num_tokens_from_string(string: str) -> int: + """Returns the number of tokens in a text string.""" + encoding = tiktoken.get_encoding("cl100k_base") + num_tokens = len(encoding.encode(string)) + return num_tokens + + +def num_tokens_from_prompt(chat_prompt: ChatPromptTemplate, prompt_input: dict) -> int: + """Returns the number of tokens in a chat prompt.""" + return num_tokens_from_string(chat_prompt.format(**prompt_input)) + + +def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate, + prompt_input: dict, + max_input_tokens: int, + omittable_features: List[str], + debug: bool): + """Check if the input is too long and omit features if necessary. + + Note: Omitted features will be replaced with "omitted" in the prompt + + Args: + prompt (ChatPromptTemplate): Prompt template + prompt_input (dict): Prompt input + max_input_tokens (int): Maximum number of tokens allowed + omittable_features (List[str]): List of features that can be omitted, ordered by priority (least important first) + debug (bool): Debug flag + + Returns: + (dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and + should_run is True if the model should run, False otherwise + """ + if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens: + return prompt_input, True + + omitted_features = [] + + # Omit features until the input is short enough + for feature in omittable_features: + if feature in prompt_input: + omitted_features.append(feature) + prompt_input[feature] = "omitted" + if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens: + if debug: + emit_meta("omitted_features", omitted_features) + return prompt_input, True + + # If we get here, we couldn't omit enough features + return prompt_input, False + + +def supports_function_calling(model: BaseLanguageModel): + """Returns True if the model supports function calling, False otherwise + + Args: + model (BaseLanguageModel): The model to check + + Returns: + boolean: True if the model supports function calling, False otherwise + """ + return isinstance(model, ChatOpenAI) + + +def get_chat_prompt_with_formatting_instructions( + model: BaseLanguageModel, + system_message: str, + human_message: str, + pydantic_object: Type[T] + ) -> ChatPromptTemplate: + """Returns a ChatPromptTemplate with formatting instructions (if necessary) + + Note: Does nothing if the model supports function calling + + Args: + model (BaseLanguageModel): The model to check if it supports function calling + system_message (str): System message + human_message (str): Human message + pydantic_object (Type[T]): Model to parse the output + + Returns: + ChatPromptTemplate: ChatPromptTemplate with formatting instructions (if necessary) + """ + if supports_function_calling(model): + system_message_prompt = SystemMessagePromptTemplate.from_template(system_message) + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message) + return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + output_parser = PydanticOutputParser(pydantic_object=pydantic_object) + system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}") + system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} + system_message_prompt.prompt.input_variables.remove("format_instructions") + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:") + return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + + +def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): + """Predicts and parses the output of the model + + Args: + model (BaseLanguageModel): The model to predict with + chat_prompt (ChatPromptTemplate): Prompt to use + prompt_input (dict): Input parameters to use for the prompt + pydantic_object (Type[T]): Pydantic model to parse the output + """ + if supports_function_calling(model): + chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) + return chain.run(**prompt_input) + + output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) + chain = LLMChain(llm=model, prompt=chat_prompt) + output = chain.run(**prompt_input) + return output_parser.parse(output) From 51f11a1d76d8e9c123450e48b03cd7aa648c1352 Mon Sep 17 00:00:00 2001 From: Paul Schwind Date: Sat, 19 Aug 2023 17:11:52 +0200 Subject: [PATCH 30/51] Fix: Allow ID autoincrement in SQLite (#82) --- .../models/big_integer_with_autoincrement.py | 16 ++++++++++++++++ athena/athena/models/db_exercise.py | 5 +++-- athena/athena/models/db_feedback.py | 3 ++- athena/athena/models/db_programming_feedback.py | 7 ++++--- .../athena/models/db_programming_submission.py | 5 +++-- athena/athena/models/db_submission.py | 6 ++++-- athena/athena/models/db_text_feedback.py | 7 ++++--- athena/athena/models/db_text_submission.py | 5 +++-- 8 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 athena/athena/models/big_integer_with_autoincrement.py diff --git a/athena/athena/models/big_integer_with_autoincrement.py b/athena/athena/models/big_integer_with_autoincrement.py new file mode 100644 index 000000000..e9b9ce168 --- /dev/null +++ b/athena/athena/models/big_integer_with_autoincrement.py @@ -0,0 +1,16 @@ +""" +SQLAlchemy + SQLite does not support the autoincrement feature for BigInteger columns. +This file provides a class as a workaround for this problem: +It uses a normal Integer column in SQLite and a BigInteger column otherwise. +SQLite Integer columns can autoincrement, but they are limited to 2^63-1. +See https://stackoverflow.com/a/23175518/4306257 for more information. +""" + +from sqlalchemy import BigInteger +from sqlalchemy.dialects import postgresql, mysql, sqlite + +# Solution from https://stackoverflow.com/a/23175518/4306257 +BigIntegerWithAutoincrement = BigInteger() +BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(postgresql.BIGINT(), 'postgresql') +BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(mysql.BIGINT(), 'mysql') +BigIntegerWithAutoincrement = BigIntegerWithAutoincrement.with_variant(sqlite.INTEGER(), 'sqlite') \ No newline at end of file diff --git a/athena/athena/models/db_exercise.py b/athena/athena/models/db_exercise.py index 888daaf6d..9a2ea929a 100644 --- a/athena/athena/models/db_exercise.py +++ b/athena/athena/models/db_exercise.py @@ -1,11 +1,12 @@ -from sqlalchemy import Column, BigInteger, String, Float, JSON, Enum as SqlEnum +from sqlalchemy import Column, String, Float, JSON, Enum as SqlEnum from athena.schemas import ExerciseType from .model import Model +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBExercise(Model): - id = Column(BigInteger, primary_key=True, index=True, nullable=False) + id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, nullable=False) title = Column(String, index=True, nullable=False) type = Column(SqlEnum(ExerciseType), index=True, nullable=False) max_points = Column(Float, index=True, nullable=False) diff --git a/athena/athena/models/db_feedback.py b/athena/athena/models/db_feedback.py index 94bb9a33d..95a07836f 100644 --- a/athena/athena/models/db_feedback.py +++ b/athena/athena/models/db_feedback.py @@ -1,12 +1,13 @@ from sqlalchemy import Column, BigInteger, Boolean, String, Float, JSON, UniqueConstraint from .model import Model +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBFeedback(Model): __table_args__ = (UniqueConstraint('lms_id'),) - id = Column(BigInteger, primary_key=True, index=True, autoincrement=True) + id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True) lms_id = Column(BigInteger) title = Column(String) description = Column(String) diff --git a/athena/athena/models/db_programming_feedback.py b/athena/athena/models/db_programming_feedback.py index b096f9ece..94e855f2d 100644 --- a/athena/athena/models/db_programming_feedback.py +++ b/athena/athena/models/db_programming_feedback.py @@ -1,11 +1,12 @@ from typing import cast, Optional from athena.schemas.programming_submission import ProgrammingSubmission -from sqlalchemy import Column, Integer, BigInteger, String, ForeignKey +from sqlalchemy import Column, Integer, String, ForeignKey from sqlalchemy.orm import relationship from athena.database import Base, get_db from .db_programming_submission import DBProgrammingSubmission from .db_feedback import DBFeedback +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBProgrammingFeedback(DBFeedback, Base): @@ -15,8 +16,8 @@ class DBProgrammingFeedback(DBFeedback, Base): line_start: Optional[int] = Column(Integer) # type: ignore line_end: Optional[int] = Column(Integer) # type: ignore - exercise_id = Column(BigInteger, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True) - submission_id = Column(BigInteger, ForeignKey("programming_submissions.id", ondelete="CASCADE"), index=True) + exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True) + submission_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_submissions.id", ondelete="CASCADE"), index=True) exercise = relationship("DBProgrammingExercise", back_populates="feedbacks") submission = relationship("DBProgrammingSubmission", back_populates="feedbacks") diff --git a/athena/athena/models/db_programming_submission.py b/athena/athena/models/db_programming_submission.py index 607ef8e0c..9dedc1809 100644 --- a/athena/athena/models/db_programming_submission.py +++ b/athena/athena/models/db_programming_submission.py @@ -1,15 +1,16 @@ -from sqlalchemy import ForeignKey, BigInteger, Column, String +from sqlalchemy import ForeignKey, Column, String from sqlalchemy.orm import relationship from athena.database import Base from .db_submission import DBSubmission +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBProgrammingSubmission(DBSubmission, Base): __tablename__ = "programming_submissions" repository_url: str = Column(String, nullable=False) # type: ignore - exercise_id = Column(BigInteger, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True) + exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("programming_exercises.id", ondelete="CASCADE"), index=True) exercise = relationship("DBProgrammingExercise", back_populates="submissions") feedbacks = relationship("DBProgrammingFeedback", back_populates="submission") diff --git a/athena/athena/models/db_submission.py b/athena/athena/models/db_submission.py index b9d55dd2c..c2e490345 100644 --- a/athena/athena/models/db_submission.py +++ b/athena/athena/models/db_submission.py @@ -1,7 +1,9 @@ -from sqlalchemy import Column, BigInteger, JSON +from sqlalchemy import Column, JSON + from .model import Model +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBSubmission(Model): - id = Column(BigInteger, primary_key=True, index=True, nullable=False) + id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True,) meta = Column(JSON, nullable=False) diff --git a/athena/athena/models/db_text_feedback.py b/athena/athena/models/db_text_feedback.py index ca31ec4ed..309f4fd28 100644 --- a/athena/athena/models/db_text_feedback.py +++ b/athena/athena/models/db_text_feedback.py @@ -1,10 +1,11 @@ from typing import Optional -from sqlalchemy import Column, Integer, BigInteger, ForeignKey +from sqlalchemy import Column, Integer, ForeignKey from sqlalchemy.orm import relationship from athena.database import Base from .db_feedback import DBFeedback +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBTextFeedback(DBFeedback, Base): @@ -13,8 +14,8 @@ class DBTextFeedback(DBFeedback, Base): index_start: Optional[int] = Column(Integer) # type: ignore index_end: Optional[int] = Column(Integer) # type: ignore - exercise_id = Column(BigInteger, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True) - submission_id = Column(BigInteger, ForeignKey("text_submissions.id", ondelete="CASCADE"), index=True) + exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True) + submission_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_submissions.id", ondelete="CASCADE"), index=True) exercise = relationship("DBTextExercise", back_populates="feedbacks") submission = relationship("DBTextSubmission", back_populates="feedbacks") diff --git a/athena/athena/models/db_text_submission.py b/athena/athena/models/db_text_submission.py index b4bf90fea..742f6bbfe 100644 --- a/athena/athena/models/db_text_submission.py +++ b/athena/athena/models/db_text_submission.py @@ -1,8 +1,9 @@ -from sqlalchemy import ForeignKey, BigInteger, Column, String +from sqlalchemy import ForeignKey, Column, String from sqlalchemy.orm import relationship from athena.database import Base from .db_submission import DBSubmission +from .big_integer_with_autoincrement import BigIntegerWithAutoincrement class DBTextSubmission(DBSubmission, Base): @@ -10,7 +11,7 @@ class DBTextSubmission(DBSubmission, Base): text: str = Column(String, nullable=False) # type: ignore language: str = Column(String, nullable=True) # type: ignore - exercise_id = Column(BigInteger, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True) + exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("text_exercises.id", ondelete="CASCADE"), index=True) exercise = relationship("DBTextExercise", back_populates="submissions") feedbacks = relationship("DBTextFeedback", back_populates="submission") From 174aa81bed9ea9d92d18a58939d6f4df5fba650f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 00:06:29 +0200 Subject: [PATCH 31/51] fix stuff --- .../module_programming_llm/generate_suggestions_by_file.py | 7 ++++++- .../prompts/generate_suggestions_by_file.py | 3 --- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 22240d708..93dc67564 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -1,4 +1,5 @@ from typing import List, Optional, Sequence +import os import asyncio from pydantic import BaseModel, Field @@ -80,11 +81,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio file_path=None, name_only=True ).split("\n") + changed_files_from_template_to_submission = [ + os.path.join(str(submission_repo.working_tree_dir or ""), file_path) + for file_path in changed_files_from_template_to_submission + ] # Changed text files changed_files = load_files_from_repo( submission_repo, - file_filter=lambda x: x in changed_files_from_template_to_submission + file_filter=lambda file_path: file_path in changed_files_from_template_to_submission ) for file_path, file_content in changed_files.items(): diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index 2e6b27059..344d635d0 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -9,9 +9,6 @@ Problem statement: {problem_statement} -Example solution: -{example_solution} - Grading instructions: {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} From d6b1e8033e9114968d7f8f025f0fdd7afec2e5b0 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:41:39 +0200 Subject: [PATCH 32/51] add fixes --- module_programming_llm/module_programming_llm/config.py | 7 +++---- .../prompts/split_problem_statement_by_file.py | 2 +- .../split_problem_statement_by_file.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 3507eded0..3d9225a1f 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -22,23 +22,22 @@ class SplitProblemStatementsByFilePrompt(BaseModel): *Note: `changed_files` are the changed files between template and solution repository.*\ """ - tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_problem_statements_by_file_human_template, description="Message from a human. The input on which the AI is supposed to act.") - + tokens_before_split: int = Field(default=250, description="Split the problem statement into file-based ones after this number of tokens.") + class SplitGradingInstructionsByFilePrompt(BaseModel): """\ Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** """ - tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") human_message: str = Field(default=split_grading_instructions_by_file_human_template, description="Message from a human. The input on which the AI is supposed to act.") - + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") class GenerationPrompt(BaseModel): """\ diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index c06aea30e..b92710c07 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -11,7 +11,7 @@ Changed files from template to sample solution: {changed_files_from_template_to_solution} -Changed files from template to student submission: +Changed files from template to student submission (Pick from this list, very important!): {changed_files_from_template_to_submission} Problem statement by file: diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 28df233bf..1cb35f7c8 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -69,7 +69,7 @@ async def split_problem_statement_by_file( chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, system_message=config.split_problem_statement_by_file_prompt.system_message, - human_message=config.split_problem_statement_by_file_prompt.system_message, + human_message=config.split_problem_statement_by_file_prompt.human_message, pydantic_object=SplitProblemStatement ) From 3a17659c8806590f35d6f7be9f9c039851aba0e3 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:51:13 +0200 Subject: [PATCH 33/51] more fixes --- .../generate_suggestions_by_file.py | 19 ++++++++++--------- .../split_grading_instructions_by_file.py | 4 ++-- .../split_problem_statement_by_file.py | 2 +- .../split_grading_instructions_by_file.py | 12 ++++++++++-- .../split_problem_statement_by_file.py | 8 ++++++++ 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 93dc67564..732ab557b 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -47,10 +47,18 @@ class Config: async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: model = config.model.get_model() + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.generate_suggestions_by_file_prompt.system_message, + human_message=config.generate_suggestions_by_file_prompt.human_message, + pydantic_object=AssessmentModel + ) + + # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( - split_problem_statement_by_file(exercise=exercise, submission=submission, config=config, debug=debug), - split_grading_instructions_by_file(exercise=exercise, submission=submission, config=config, debug=debug) + split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug), + split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug) ) is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split @@ -141,13 +149,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio "problem_statement": problem_statement, }) - chat_prompt = get_chat_prompt_with_formatting_instructions( - model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, - pydantic_object=AssessmentModel - ) - # Filter long prompts (omitting features if necessary) omittable_features = [ "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 2f181609f..54f2872f6 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the grading instructions by changed file to make it simpler. +Restructure the grading instructions by student changed file to make it simpler. """ human_template = """\ @@ -11,7 +11,7 @@ Changed files from template to sample solution: {changed_files_from_template_to_solution} -Changed files from template to student submission: +Changed files from template to student submission (Pick from this list, very important!): {changed_files_from_template_to_submission} Grading instructions by file: diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index b92710c07..397e34893 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,7 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -Restructure the problem statement by changed file. +Restructure the problem statement by student changed file to make it simpler. """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index cb1f4a29f..97903d809 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -3,6 +3,8 @@ from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -28,6 +30,7 @@ class SplitGradingInstructions(BaseModel): async def split_grading_instructions_by_file( exercise: Exercise, submission: Submission, + prompt: ChatPromptTemplate, config: BasicApproachConfig, debug: bool ) -> Optional[SplitGradingInstructions]: @@ -36,6 +39,7 @@ async def split_grading_instructions_by_file( Args: exercise (Exercise): Exercise to split the grading instructions for (respecting the changed files) submission (Submission): Submission to split the grading instructions for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for grading_instructions config (BasicApproachConfig): Configuration Returns: @@ -44,7 +48,11 @@ async def split_grading_instructions_by_file( # Return None if the grading instructions are too short if (exercise.grading_instructions is None - or num_tokens_from_string(exercise.grading_instructions) <= config.split_problem_statement_by_file_prompt.tokens_before_split): + or num_tokens_from_string(exercise.grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split): + return None + + # Return None if the grading instructions are not in the prompt + if "grading_instructions" not in prompt.input_variables: return None model = config.model.get_model() @@ -92,7 +100,7 @@ async def split_grading_instructions_by_file( ) if debug: - emit_meta("file_problem_statement", { + emit_meta("file_grading_instructions", { "prompt": chat_prompt.format(**prompt_input), "result": split_grading_instructions.dict() }) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 1cb35f7c8..4790f8ce8 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -3,6 +3,8 @@ from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -28,6 +30,7 @@ class SplitProblemStatement(BaseModel): async def split_problem_statement_by_file( exercise: Exercise, submission: Submission, + prompt: ChatPromptTemplate, config: BasicApproachConfig, debug: bool ) -> Optional[SplitProblemStatement]: @@ -36,6 +39,7 @@ async def split_problem_statement_by_file( Args: exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) submission (Submission): Submission to split the problem statement for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for problem_statement config (BasicApproachConfig): Configuration Returns: @@ -45,6 +49,10 @@ async def split_problem_statement_by_file( # Return None if the problem statement is too short if num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split: return None + + # Return None if the problem statement not in the prompt + if "problem_statement" not in prompt.input_variables: + return None model = config.model.get_model() From f7d080949c9daa30a036d83b828f58e87fd96354 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:51:29 +0200 Subject: [PATCH 34/51] remove empty line --- .../module_programming_llm/generate_suggestions_by_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 732ab557b..c3cd1201e 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -54,7 +54,6 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio pydantic_object=AssessmentModel ) - # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( split_problem_statement_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug), From 3f4e07dd857242cbea14a23bba11ea0103c39597 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 09:53:30 +0200 Subject: [PATCH 35/51] delete unuse --- .../basic/basic_feedback_provider.py | 110 ------------------ 1 file changed, 110 deletions(-) delete mode 100644 module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py diff --git a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py b/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py deleted file mode 100644 index 134fd0e41..000000000 --- a/module_programming_llm/module_programming_llm/basic/basic_feedback_provider.py +++ /dev/null @@ -1,110 +0,0 @@ -# import json -# from typing import List - -# from langchain.chains import LLMChain -# from langchain.prompts import ( -# ChatPromptTemplate, -# SystemMessagePromptTemplate, -# HumanMessagePromptTemplate, -# ) - -# from athena.programming import Exercise, Submission, Feedback -# from athena.logger import logger - -# from module_programming_llm.helpers.utils import get_diff, get_programming_language_file_extension, load_files_from_repo, add_line_numbers -# from module_programming_llm.helpers.models import chat - -# from ..prompts.basic_feedback_provider import system_template, human_template - -# async def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]: -# max_prompt_length = 2560 -# input_list: List[dict] = [] - -# if exercise.meta['file_grading_instructions'] is None: -# raise ValueError("No file grading instructions found for exercise in meta.") -# if exercise.meta['file_problem_statements'] is None: -# raise ValueError("No file problem statements found for exercise in meta.") - -# # Feature extraction -# solution_repo = exercise.get_solution_repository() -# template_repo = exercise.get_template_repository() -# submission_repo = submission.get_repository() - -# file_extension = get_programming_language_file_extension(exercise.programming_language) -# if file_extension is None: -# raise ValueError(f"Could not determine file extension for programming language {exercise.programming_language}.") - -# for file_path, submission_content in load_files_from_repo(submission_repo, file_filter=lambda x: x.endswith(file_extension) if file_extension else False).items(): -# if submission_content is None: -# continue - -# problem_statement = exercise.meta['file_problem_statements'].get(file_path) -# if problem_statement is None: -# logger.info("No problem statement for %s, skipping.", file_path) -# continue - -# grading_instructions = exercise.meta['file_grading_instructions'].get(file_path) -# if grading_instructions is None: -# logger.info("No grading instructions for %s, skipping.", file_path) -# continue - -# submission_content = add_line_numbers(submission_content) -# solution_to_submission_diff = get_diff(src_repo=solution_repo, dst_repo=submission_repo, src_prefix="solution", dst_prefix="submission", file_path=file_path) -# template_to_submission_diff = get_diff(src_repo=template_repo, dst_repo=submission_repo, src_prefix="template", dst_prefix="submission", file_path=file_path) - -# input_list.append({ -# "file_path": file_path, -# "submission_content": submission_content, -# "solution_to_submission_diff": solution_to_submission_diff, -# "template_to_submission_diff": template_to_submission_diff, -# "grading_instructions": grading_instructions, -# "problem_statement": problem_statement, -# }) - -# system_message_prompt = SystemMessagePromptTemplate.from_template(system_template) -# human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) -# chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) - -# # Filter long prompts -# input_list = [input for input in input_list if chat.get_num_tokens_from_messages(chat_prompt.format_messages(**input)) <= max_prompt_length] - -# # Completion -# chain = LLMChain(llm=chat, prompt=chat_prompt) -# if not input_list: -# return [] -# result = await chain.agenerate(input_list) - -# # Parse result -# feedback_proposals: List[Feedback] = [] -# for input, generations in zip(input_list, result.generations): -# file_path = input["file_path"] -# for generation in generations: -# try: -# feedbacks = json.loads(generation.text) -# except json.JSONDecodeError: -# logger.error("Failed to parse feedback json: %s", generation.text) -# continue -# if not isinstance(feedbacks, list): -# logger.error("Feedback json is not a list: %s", generation.text) -# continue - -# for feedback in feedbacks: -# line = feedback.get("line", None) -# description = feedback.get("text", None) -# credits = feedback.get("credits", 0.0) -# feedback_proposals.append( -# Feedback( -# id=None, -# exercise_id=exercise.id, -# submission_id=submission.id, -# title="Feedback", -# description=description, -# file_path=file_path, -# line_start=line, -# line_end=None, -# credits=credits, -# meta={}, -# ) -# ) - -# return feedback_proposals \ No newline at end of file From 1025769a55574d251f256f10400b780567cbcebf Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:09:08 +0200 Subject: [PATCH 36/51] add small changes --- .../module_programming_llm/__main__.py | 1 + .../module_programming_llm/config.py | 13 +++++++------ .../generate_suggestions_by_file.py | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index 786d9d824..556313bf6 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -33,5 +33,6 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co if __name__ == "__main__": + # Preload for token estimation later tiktoken.get_encoding("cl100k_base") app.start() diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 3d9225a1f..743ddf693 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -18,9 +18,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel): """\ -Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** - -*Note: `changed_files` are the changed files between template and solution repository.*\ +Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ """ system_message: str = Field(default=split_problem_statements_by_file_system_template, description="Message for priming AI behavior and instructing it what to do.") @@ -31,7 +29,7 @@ class SplitProblemStatementsByFilePrompt(BaseModel): class SplitGradingInstructionsByFilePrompt(BaseModel): """\ -Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}** +Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ """ system_message: str = Field(default=split_grading_instructions_by_file_template, description="Message for priming AI behavior and instructing it what to do.") @@ -53,11 +51,14 @@ class GenerationPrompt(BaseModel): class BasicApproachConfig(BaseModel): - """This approach uses a LLM with a single prompt to generate feedback in a single step.""" + """\ +This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ +Then, it generates suggestions for each file independently.\ +""" max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - max_number_of_files: int = Field(default=25, description="Maximum number of files.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") split_problem_statement_by_file_prompt: SplitProblemStatementsByFilePrompt = Field(default=SplitProblemStatementsByFilePrompt()) split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = Field(default=SplitGradingInstructionsByFilePrompt()) generate_suggestions_by_file_prompt: GenerationPrompt = Field(default=GenerationPrompt()) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index c3cd1201e..179881f1b 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -136,8 +136,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio ) prompt_inputs.append({ - "file_path": file_path, - "priority": len(template_to_solution_diff), + "file_path": file_path, # Not really relevant for the prompt + "priority": len(template_to_solution_diff), # Not really relevant for the prompt "submission_file": file_content, "max_points": exercise.max_points, "bonus_points": exercise.bonus_points, @@ -149,15 +149,15 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio }) # Filter long prompts (omitting features if necessary) + # "submission_file" is not omittable, because it is the main input containing the line numbers + # In the future we might be able to include the line numbers in the diff, but for now we need to keep it omittable_features = [ - "template_to_solution_diff", # If it is even set (has the lowest priority since it is indirectly included in other diffs) + "template_to_solution_diff", # If it is even included in the prompt (has the lowest priority since it is indirectly included in other diffs) "problem_statement", "grading_instructions", "solution_to_submission_diff", - "template_to_submission_diff", + "template_to_submission_diff", # In the future we might indicate the changed lines in the submission_file additionally ] - # "submission_file" is not omittable, because it is the main input containing the line numbers - # In the future we might be able to include the line numbers in the diff, but for now we need to keep it prompt_inputs = [ omitted_prompt_input for omitted_prompt_input, should_run in From 7215b73e5372bb6a74adc1dddae512207406581a Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:10:47 +0200 Subject: [PATCH 37/51] typo --- .../module_programming_llm/split_problem_statement_by_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 4790f8ce8..f2c1f0f1f 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -99,7 +99,7 @@ async def split_problem_statement_by_file( ) if debug: - emit_meta("file_problem_statement", { + emit_meta("file_problem_statements", { "prompt": chat_prompt.format(**prompt_input), "result": split_problem_statement.dict() }) From 16a5ec19df9cb4d2516aff5286deff853667717a Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:30:33 +0200 Subject: [PATCH 38/51] add more fixes --- .../split_grading_instructions_by_file.py | 5 ++--- .../split_problem_statement_by_file.py | 5 ++--- .../split_grading_instructions_by_file.py | 20 +++++++++++++++++-- .../split_problem_statement_by_file.py | 20 +++++++++++++++++-- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py index 54f2872f6..21b754846 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py @@ -1,7 +1,6 @@ system_template = """\ -You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. - -Restructure the grading instructions by student changed file to make it simpler. +Your task is to restructure the grading instructions by student changed file to show a tutor \ +relevant instructions for each file. This should make it easier for the tutor to grade the assignment.\ """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py index 397e34893..95dde8787 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/split_problem_statement_by_file.py @@ -1,7 +1,6 @@ system_template = """\ -You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. - -Restructure the problem statement by student changed file to make it simpler. +Your task is to restructure the problem statement by student changed file to show the student \ +relevant information for each file. This should make it easier for the student to solve the assignment.\ """ human_template = """\ diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 97903d809..a565b7e0c 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -1,10 +1,10 @@ from typing import Optional, Sequence -from athena import emit_meta +from collections import defaultdict from pydantic import BaseModel, Field - from langchain.prompts import ChatPromptTemplate +from athena import emit_meta from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -108,4 +108,20 @@ async def split_grading_instructions_by_file( if not split_grading_instructions.file_grading_instructions: return None + # Join duplicate file names (some responses contain multiple grading instructions for the same file) + file_grading_instructions_by_file_name = defaultdict(list) + for file_grading_instruction in split_grading_instructions.file_grading_instructions: + file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction) + + split_grading_instructions.file_grading_instructions = [ + FileGradingInstruction( + file_name=file_name, + grading_instructions="\n".join( + file_grading_instruction.grading_instructions + for file_grading_instruction in file_grading_instructions + ) + ) + for file_name, file_grading_instructions in file_grading_instructions_by_file_name.items() + ] + return split_grading_instructions diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index f2c1f0f1f..ccfc3533a 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -1,10 +1,10 @@ from typing import Optional, Sequence -from athena import emit_meta +from collections import defaultdict from pydantic import BaseModel, Field - from langchain.prompts import ChatPromptTemplate +from athena import emit_meta from athena.programming import Exercise, Submission from module_programming_llm.config import BasicApproachConfig @@ -107,4 +107,20 @@ async def split_problem_statement_by_file( if not split_problem_statement.file_problem_statements: return None + # Join duplicate file names (some responses contain multiple problem statements for the same file) + file_problem_statements_by_file_name = defaultdict(list) + for file_problem_statement in split_problem_statement.file_problem_statements: + file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) + + split_problem_statement.file_problem_statements = [ + FileProblemStatement( + file_name=file_name, + problem_statement="\n".join( + file_problem_statement.problem_statement + for file_problem_statement in file_problem_statements + ) + ) + for file_name, file_problem_statements in file_problem_statements_by_file_name.items() + ] + return split_problem_statement From 9680c68715b7d2705b77ce45f79d3830a403e1ce Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 10:36:18 +0200 Subject: [PATCH 39/51] fix pydantic --- .../module_programming_llm/split_grading_instructions_by_file.py | 1 + .../module_programming_llm/split_problem_statement_by_file.py | 1 + 2 files changed, 2 insertions(+) diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index a565b7e0c..0b1a2a615 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -27,6 +27,7 @@ class SplitGradingInstructions(BaseModel): file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") +# pylint: disable=too-many-locals async def split_grading_instructions_by_file( exercise: Exercise, submission: Submission, diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index ccfc3533a..cc72cd3f6 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -27,6 +27,7 @@ class SplitProblemStatement(BaseModel): file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") +# pylint: disable=too-many-locals async def split_problem_statement_by_file( exercise: Exercise, submission: Submission, From 6b16d0d0830f51f986347388e7986f48615598b9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 11:56:38 +0200 Subject: [PATCH 40/51] small improvements --- .../generate_suggestions_by_file.py | 4 ++-- .../split_grading_instructions_by_file.py | 8 ++++---- .../split_problem_statement_by_file.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 179881f1b..e4d533b21 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -63,7 +63,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split file_problem_statements = { item.file_name: item.problem_statement - for item in split_problem_statement.file_problem_statements + for item in split_problem_statement.items } if split_problem_statement is not None else {} is_short_grading_instructions = ( @@ -72,7 +72,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio ) file_grading_instructions = { item.file_name: item.grading_instructions - for item in split_grading_instructions.file_grading_instructions + for item in split_grading_instructions.items } if split_grading_instructions is not None else {} prompt_inputs: List[dict] = [] diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 0b1a2a615..388d7a865 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -24,7 +24,7 @@ class FileGradingInstruction(BaseModel): class SplitGradingInstructions(BaseModel): """Collection of grading instructions split by file""" - file_grading_instructions: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") # pylint: disable=too-many-locals @@ -106,15 +106,15 @@ async def split_grading_instructions_by_file( "result": split_grading_instructions.dict() }) - if not split_grading_instructions.file_grading_instructions: + if not split_grading_instructions.items: return None # Join duplicate file names (some responses contain multiple grading instructions for the same file) file_grading_instructions_by_file_name = defaultdict(list) - for file_grading_instruction in split_grading_instructions.file_grading_instructions: + for file_grading_instruction in split_grading_instructions.items: file_grading_instructions_by_file_name[file_grading_instruction.file_name].append(file_grading_instruction) - split_grading_instructions.file_grading_instructions = [ + split_grading_instructions.items = [ FileGradingInstruction( file_name=file_name, grading_instructions="\n".join( diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index cc72cd3f6..d423a8a48 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -24,7 +24,7 @@ class FileProblemStatement(BaseModel): class SplitProblemStatement(BaseModel): """Collection of problem statements split by file""" - file_problem_statements: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + items: Sequence[FileProblemStatement] = Field(..., description="File problem statements") # pylint: disable=too-many-locals @@ -105,15 +105,15 @@ async def split_problem_statement_by_file( "result": split_problem_statement.dict() }) - if not split_problem_statement.file_problem_statements: + if not split_problem_statement.items: return None # Join duplicate file names (some responses contain multiple problem statements for the same file) file_problem_statements_by_file_name = defaultdict(list) - for file_problem_statement in split_problem_statement.file_problem_statements: + for file_problem_statement in split_problem_statement.items: file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) - split_problem_statement.file_problem_statements = [ + split_problem_statement.items = [ FileProblemStatement( file_name=file_name, problem_statement="\n".join( From ac40ebef846495d90e9e374d8f3c60d4cbcc6d07 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 12:45:42 +0200 Subject: [PATCH 41/51] add final fixes --- .../generate_suggestions_by_file.py | 4 +-- .../helpers/llm_utils.py | 33 ++++++++++++------- .../prompts/generate_suggestions_by_file.py | 16 ++++----- .../split_grading_instructions_by_file.py | 4 +-- .../split_problem_statement_by_file.py | 4 +-- 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index e4d533b21..152fb6660 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -194,7 +194,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio filtered_prompt_inputs.append(prompt_inputs.pop(0)) prompt_inputs = filtered_prompt_inputs - results: List[AssessmentModel] = await asyncio.gather(*[ + results: List[Optional[AssessmentModel]] = await asyncio.gather(*[ predict_and_parse( model=model, chat_prompt=chat_prompt, @@ -209,7 +209,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio { "file_path": prompt_input["file_path"], "prompt": chat_prompt.format(**prompt_input), - "result": result.dict() + "result": result.dict() if result is not None else None } for prompt_input, result in zip(prompt_inputs, results) ] diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index b59ca9dab..394bdd2f7 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -1,6 +1,5 @@ -from typing import Type, TypeVar, List -from pydantic import BaseModel - +from typing import Optional, Type, TypeVar, List +from pydantic import BaseModel, ValidationError import tiktoken from langchain.chains import LLMChain @@ -11,8 +10,9 @@ SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) -from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.chains.openai_functions import create_structured_output_chain +from langchain.output_parsers import PydanticOutputParser +from langchain.schema import OutputParserException from athena import emit_meta @@ -114,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions( return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) -async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): +async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]: """Predicts and parses the output of the model Args: @@ -122,12 +122,23 @@ async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTem chat_prompt (ChatPromptTemplate): Prompt to use prompt_input (dict): Input parameters to use for the prompt pydantic_object (Type[T]): Pydantic model to parse the output + + Returns: + Optional[T]: Parsed output, or None if it could not be parsed """ if supports_function_calling(model): chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) - return chain.run(**prompt_input) - - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - return output_parser.parse(output) + + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None + + output_parser = PydanticOutputParser(pydantic_object=pydantic_object) + chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser) + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None diff --git a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py index 344d635d0..7535fd244 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/prompts/generate_suggestions_by_file.py @@ -1,11 +1,6 @@ system_template = """\ You are an AI tutor at a prestigious university tasked with grading and providing feedback to programming assignments. -VERY IMPORTANT: Effective feedback for text assignments should be: -1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\ -""" - -human_template = """\ Problem statement: {problem_statement} @@ -13,12 +8,17 @@ {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} -Student\'s submission file to grade (with line numbers : ): -{submission_file} - Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} +VERY IMPORTANT: Effective feedback for text assignments should be: +1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual\ +""" + +human_template = """\ Diff between template (deletions) and student\'s submission (additions): {template_to_submission_diff} + +Student\'s submission file to grade (with line numbers : ): +{submission_file} """ \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 388d7a865..4ba10cb54 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -103,10 +103,10 @@ async def split_grading_instructions_by_file( if debug: emit_meta("file_grading_instructions", { "prompt": chat_prompt.format(**prompt_input), - "result": split_grading_instructions.dict() + "result": split_grading_instructions.dict() if split_grading_instructions is not None else None }) - if not split_grading_instructions.items: + if split_grading_instructions is None or not split_grading_instructions.items: return None # Join duplicate file names (some responses contain multiple grading instructions for the same file) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index d423a8a48..4a4761610 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -102,10 +102,10 @@ async def split_problem_statement_by_file( if debug: emit_meta("file_problem_statements", { "prompt": chat_prompt.format(**prompt_input), - "result": split_problem_statement.dict() + "result": split_problem_statement.dict() if split_problem_statement is not None else None }) - if not split_problem_statement.items: + if split_problem_statement is None or not split_problem_statement.items: return None # Join duplicate file names (some responses contain multiple problem statements for the same file) From 24d068c6ee7edf99849b142191c50f6e3ba9d687 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 12:48:51 +0200 Subject: [PATCH 42/51] fix parsing --- .../module_text_llm/generate_suggestions.py | 10 ++++-- .../module_text_llm/helpers/llm_utils.py | 34 ++++++++++++------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py index b8110be1e..2acb2c803 100644 --- a/module_text_llm/module_text_llm/generate_suggestions.py +++ b/module_text_llm/module_text_llm/generate_suggestions.py @@ -71,7 +71,7 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}") return [] - result = predict_and_parse( + result = await predict_and_parse( model=model, chat_prompt=chat_prompt, prompt_input=prompt_input, @@ -79,7 +79,13 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi ) if debug: - emit_meta("prompt", chat_prompt.format(**prompt_input)) + emit_meta("generate_suggestions", { + "prompt": chat_prompt.format(**prompt_input), + "result": result.dict() if result is not None else None + }) + + if result is None: + return [] feedbacks = [] for feedback in result.feedbacks: diff --git a/module_text_llm/module_text_llm/helpers/llm_utils.py b/module_text_llm/module_text_llm/helpers/llm_utils.py index 1cfd646e6..394bdd2f7 100644 --- a/module_text_llm/module_text_llm/helpers/llm_utils.py +++ b/module_text_llm/module_text_llm/helpers/llm_utils.py @@ -1,6 +1,5 @@ -from typing import Type, TypeVar, List -from pydantic import BaseModel - +from typing import Optional, Type, TypeVar, List +from pydantic import BaseModel, ValidationError import tiktoken from langchain.chains import LLMChain @@ -11,12 +10,12 @@ SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) -from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.chains.openai_functions import create_structured_output_chain +from langchain.output_parsers import PydanticOutputParser +from langchain.schema import OutputParserException from athena import emit_meta - T = TypeVar("T", bound=BaseModel) @@ -115,7 +114,7 @@ def get_chat_prompt_with_formatting_instructions( return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) -def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]): +async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]: """Predicts and parses the output of the model Args: @@ -123,12 +122,23 @@ def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, chat_prompt (ChatPromptTemplate): Prompt to use prompt_input (dict): Input parameters to use for the prompt pydantic_object (Type[T]): Pydantic model to parse the output + + Returns: + Optional[T]: Parsed output, or None if it could not be parsed """ if supports_function_calling(model): chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt) - return chain.run(**prompt_input) - - output_parser = OutputFixingParser.from_llm(parser=PydanticOutputParser(pydantic_object=pydantic_object), llm=model) - chain = LLMChain(llm=model, prompt=chat_prompt) - output = chain.run(**prompt_input) - return output_parser.parse(output) + + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None + + output_parser = PydanticOutputParser(pydantic_object=pydantic_object) + chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser) + try: + return await chain.arun(**prompt_input) + except (OutputParserException, ValidationError): + # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt) + return None From 67101e87d24c2f3c8a916e9d8ed28e7751420510 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 12:57:57 +0200 Subject: [PATCH 43/51] improve parsing --- .../module_programming_llm/helpers/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index 394bdd2f7..82db286f2 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -110,7 +110,7 @@ def get_chat_prompt_with_formatting_instructions( system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}") system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} system_message_prompt.prompt.input_variables.remove("format_instructions") - human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON Response:") + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON response following the provided schema:") return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) From 4c077af40742906753dd43e99387f90b1ebbf87b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 12:59:42 +0200 Subject: [PATCH 44/51] add new line --- .../module_programming_llm/helpers/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index 82db286f2..ab15edc60 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -110,7 +110,7 @@ def get_chat_prompt_with_formatting_instructions( system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}") system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()} system_message_prompt.prompt.input_variables.remove("format_instructions") - human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\nJSON response following the provided schema:") + human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:") return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) From 2e96edfcfa2562369e3545d9414915315478611c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 20 Aug 2023 13:11:49 +0200 Subject: [PATCH 45/51] fix type issue --- .../module_programming_llm/generate_suggestions_by_file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 152fb6660..56651afd6 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -218,6 +218,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio feedbacks: List[Feedback] = [] for prompt_input, result in zip(prompt_inputs, results): file_path = prompt_input["file_path"] + if result is None: + continue for feedback in result.feedbacks: feedbacks.append(Feedback( exercise_id=exercise.id, From ec4d36ebf2506fc2c21b4f059c941503bee0c920 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 27 Aug 2023 23:19:53 +0200 Subject: [PATCH 46/51] add prospector fixes --- .../module_programming_llm/generate_suggestions_by_file.py | 2 +- .../module_programming_llm/helpers/models/__init__.py | 6 ++++-- .../module_programming_llm/helpers/models/replicate.py | 7 ++++++- .../split_grading_instructions_by_file.py | 2 +- .../split_problem_statement_by_file.py | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 56651afd6..160c72471 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -45,7 +45,7 @@ class Config: # pylint: disable=too-many-locals async def generate_suggestions_by_file(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: - model = config.model.get_model() + model = config.model.get_model() # type: ignore[attr-defined] chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, diff --git a/module_programming_llm/module_programming_llm/helpers/models/__init__.py b/module_programming_llm/module_programming_llm/helpers/models/__init__.py index f5ab68a2f..f77d791cd 100644 --- a/module_programming_llm/module_programming_llm/helpers/models/__init__.py +++ b/module_programming_llm/module_programming_llm/helpers/models/__init__.py @@ -30,7 +30,9 @@ if 'DefaultModelConfig' not in globals(): DefaultModelConfig = types[0] +type0 = types[0] if len(types) == 1: - ModelConfigType = types[0] + ModelConfigType = type0 else: - ModelConfigType = Union[tuple(types)] # type: ignore + type1 = types[1] + ModelConfigType = Union[type0, type1] # type: ignore diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py index a706b8247..af2e30fa5 100644 --- a/module_programming_llm/module_programming_llm/helpers/models/replicate.py +++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py @@ -10,6 +10,11 @@ # Hardcoded list of models +# If necessary, add more models from replicate here, the config below might need adjustments depending on the available +# parameters of the model +# +# To update the version of the models, go to the respective page on replicate.com and copy the (latest) version id +# from and paste it after the colon in the value of the dictionary. Ever so often a new version is released. replicate_models = { # LLAMA 2 70B Chat # https://replicate.com/replicate/llama-2-70b-chat @@ -23,7 +28,7 @@ } available_models = {} -if len(os.environ.get("REPLICATE_API_TOKEN") or "") > 0: +if os.environ.get("REPLICATE_API_TOKEN"): # If Replicate is available available_models = { name: Replicate( model=model, diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 4ba10cb54..695850ec6 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -56,7 +56,7 @@ async def split_grading_instructions_by_file( if "grading_instructions" not in prompt.input_variables: return None - model = config.model.get_model() + model = config.model.get_model() # type: ignore[attr-defined] template_repo = exercise.get_template_repository() solution_repo = exercise.get_solution_repository() diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 8892685fa..ae90443a8 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -55,7 +55,7 @@ async def split_problem_statement_by_file( if "problem_statement" not in prompt.input_variables: return None - model = config.model.get_model() + model = config.model.get_model() # type: ignore[attr-defined] template_repo = exercise.get_template_repository() solution_repo = exercise.get_solution_repository() From 64a72a3d74372d210432d7f4c95944015d3adea5 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 27 Aug 2023 23:23:50 +0200 Subject: [PATCH 47/51] fix playground ui --- .../src/components/selectors/module_config_select/index.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/playground/src/components/selectors/module_config_select/index.tsx b/playground/src/components/selectors/module_config_select/index.tsx index 9434260a1..765088b1c 100644 --- a/playground/src/components/selectors/module_config_select/index.tsx +++ b/playground/src/components/selectors/module_config_select/index.tsx @@ -22,11 +22,12 @@ import ModuleLLMConfig from "./module_llm"; // 4. Add your component to the customModuleConfigComponents object // // Use ModuleLLMConfig as example. -type CustomModuleConfig = "module_text_llm"; +type CustomModuleConfig = "module_text_llm" | "module_programming_llm"; const customModuleConfigComponents: { [key in CustomModuleConfig]: React.FC; } = { module_text_llm: ModuleLLMConfig, + module_programming_llm: ModuleLLMConfig, }; type SetConfig = Dispatch>; From 2d710fd202f6c4fd47dc967d0ebf48b83d40163d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 27 Aug 2023 23:38:38 +0200 Subject: [PATCH 48/51] add problem statement fixes --- .../module_programming_llm/generate_suggestions_by_file.py | 5 +++-- .../split_problem_statement_by_file.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 160c72471..34fd4416b 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -60,7 +60,8 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio split_grading_instructions_by_file(exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug) ) - is_short_problem_statement = num_tokens_from_string(exercise.problem_statement) <= config.split_problem_statement_by_file_prompt.tokens_before_split + problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") + is_short_problem_statement = problem_statement_tokens <= config.split_problem_statement_by_file_prompt.tokens_before_split file_problem_statements = { item.file_name: item.problem_statement for item in split_problem_statement.items @@ -101,7 +102,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio for file_path, file_content in changed_files.items(): problem_statement = ( - exercise.problem_statement if is_short_problem_statement + exercise.problem_statement or "" if is_short_problem_statement else file_problem_statements.get(file_path, "No relevant problem statement section found.") ) problem_statement = problem_statement if problem_statement.strip() else "No problem statement found." diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index ae90443a8..7aee29e4a 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -48,7 +48,7 @@ async def split_problem_statement_by_file( """ # Return None if the problem statement is too short - if num_tokens_from_string(exercise.problem_statement or "No problem statement.") <= config.split_problem_statement_by_file_prompt.tokens_before_split: + if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_by_file_prompt.tokens_before_split: return None # Return None if the problem statement not in the prompt @@ -83,7 +83,7 @@ async def split_problem_statement_by_file( ) prompt_input = { - "problem_statement": exercise.problem_statement, + "problem_statement": exercise.problem_statement or "No problem statement.", "changed_files_from_template_to_solution": ", ".join(changed_files_from_template_to_solution), "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } From 5c922c8714e84bdb1b0c2417f6782109afbc5b3b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 3 Sep 2023 19:31:34 +0200 Subject: [PATCH 49/51] add codellama --- .../module_programming_llm/helpers/models/replicate.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/module_programming_llm/module_programming_llm/helpers/models/replicate.py b/module_programming_llm/module_programming_llm/helpers/models/replicate.py index af2e30fa5..15eef0216 100644 --- a/module_programming_llm/module_programming_llm/helpers/models/replicate.py +++ b/module_programming_llm/module_programming_llm/helpers/models/replicate.py @@ -25,6 +25,12 @@ # LLaMA 2 7B Chat # https://replicate.com/a16z-infra/llama-2-7b-chat "llama-2-7b-chat": "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c", + # CodeLLAMA 2 13B + # https://replicate.com/replicate/codellama-13b + "codellama-13b": "replicate/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", + # CodeLLAMA 2 34B + # https://replicate.com/replicate/codellama-34b + "codellama-34b": "replicate/codellama-34b:0666717e5ead8557dff55ee8f11924b5c0309f5f1ca52f64bb8eec405fdb38a7", } available_models = {} From f0bdb8d25f40996bc3d1e0d8869bd6161391ed47 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 19 Oct 2023 16:18:01 +0200 Subject: [PATCH 50/51] implement feedback --- .../generate_suggestions_by_file.py | 10 +++++----- .../module_programming_llm/helpers/llm_utils.py | 2 +- .../split_grading_instructions_by_file.py | 6 +++--- .../split_problem_statement_by_file.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 34fd4416b..bcd3d0666 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -24,10 +24,10 @@ class FeedbackModel(BaseModel): - title: str = Field(..., description="Very short title, i.e. feedback category", example="Logic Error") - description: str = Field(..., description="Feedback description") - line_start: Optional[int] = Field(..., description="Referenced line number start, or empty if unreferenced") - line_end: Optional[int] = Field(..., description="Referenced line number end, or empty if unreferenced") + title: str = Field(description="Very short title, i.e. feedback category", example="Logic Error") + description: str = Field(description="Feedback description") + line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") + line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") credits: float = Field(0.0, description="Number of points received/deducted") class Config: @@ -37,7 +37,7 @@ class Config: class AssessmentModel(BaseModel): """Collection of feedbacks making up an assessment""" - feedbacks: Sequence[FeedbackModel] = Field(..., description="Assessment feedbacks") + feedbacks: Sequence[FeedbackModel] = Field(description="Assessment feedbacks") class Config: title = "Assessment" diff --git a/module_programming_llm/module_programming_llm/helpers/llm_utils.py b/module_programming_llm/module_programming_llm/helpers/llm_utils.py index ab15edc60..5faad8b63 100644 --- a/module_programming_llm/module_programming_llm/helpers/llm_utils.py +++ b/module_programming_llm/module_programming_llm/helpers/llm_utils.py @@ -115,7 +115,7 @@ def get_chat_prompt_with_formatting_instructions( async def predict_and_parse(model: BaseLanguageModel, chat_prompt: ChatPromptTemplate, prompt_input: dict, pydantic_object: Type[T]) -> Optional[T]: - """Predicts and parses the output of the model + """Predicts an LLM completion using the model and parses the output using the provided Pydantic model Args: model (BaseLanguageModel): The model to predict with diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py index 695850ec6..a8ef00fb1 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py @@ -18,13 +18,13 @@ class FileGradingInstruction(BaseModel): - file_name: str = Field(..., description="File name") - grading_instructions: str = Field(..., description="Grading instructions relevant for this file") + file_name: str = Field(description="File name") + grading_instructions: str = Field(description="Grading instructions relevant for this file") class SplitGradingInstructions(BaseModel): """Collection of grading instructions split by file""" - items: Sequence[FileGradingInstruction] = Field(..., description="File grading instructions") + items: Sequence[FileGradingInstruction] = Field(description="File grading instructions") # pylint: disable=too-many-locals diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py index 7aee29e4a..a877f8c77 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py @@ -18,13 +18,13 @@ class FileProblemStatement(BaseModel): - file_name: str = Field(..., description="File name") - problem_statement: str = Field(..., description="Problem statement relevant for this file") + file_name: str = Field(description="File name") + problem_statement: str = Field(description="Problem statement relevant for this file") class SplitProblemStatement(BaseModel): """Collection of problem statements split by file""" - items: Sequence[FileProblemStatement] = Field(..., description="File problem statements") + items: Sequence[FileProblemStatement] = Field(description="File problem statements") # pylint: disable=too-many-locals From 70b8f407d0d891a525784d9745625dbc84b32673 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Fri, 20 Oct 2023 10:19:29 +0200 Subject: [PATCH 51/51] implement feedback --- .../module_programming_llm/generate_suggestions_by_file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index bcd3d0666..2d646026f 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -100,6 +100,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio file_filter=lambda file_path: file_path in changed_files_from_template_to_submission ) + # Gather prompt inputs for each changed file (independently) for file_path, file_content in changed_files.items(): problem_statement = ( exercise.problem_statement or "" if is_short_problem_statement @@ -150,6 +151,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio }) # Filter long prompts (omitting features if necessary) + # Lowest priority features are at the top of the list (i.e. they are omitted first if necessary) # "submission_file" is not omittable, because it is the main input containing the line numbers # In the future we might be able to include the line numbers in the diff, but for now we need to keep it omittable_features = [