naming and links

AstraBert · AstraBert · commit c907a11e121f · 2025-03-06T01:47:51.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@ eval_framework.code-workspace
 qdrant_storage/
 .mypy_cache/
 test_data/
+pypi_package/dist/
+pypi_package/src/diragnosis.egg-info/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to RAGnosis
+# Contributing to diRAGnosis
 
 Do you want to contribute to this project? Make sure to read this guidelines first :)
 
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Astra Clelia Bertelli
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ git clone https://github.com/AstraBert/diRAGnosis.git
 cd diRAGnosis/
 ```
 
-**Docker (recommended)**🐋
+**Docker (recommended)🐋**
 
 > _Required: [Docker](https://docs.docker.com/desktop/) and [docker compose](https://docs.docker.com/compose/)_
 
diff --git a/pypi_package/pyproject.toml b/pypi_package/pyproject.toml
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "diragnosis"
-version = "0.0.0"
+version = "0.0.0.post1"
 authors = [
     { name="Clelia Astra Bertelli", email="astraberte9@gmail.com" },
 ]
-description = "RAGnosis - Diagnose the performance of your RAG!"
+description = "diRAGnosis - Diagnose the performance of your RAG!"
 readme = "README.md"
 requires-python = ">=3.10"
 classifiers = [
@@ -40,7 +40,7 @@ Issues = "https://github.com/AstraBert/RAGnosis/issues"
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["ragnosis*"]
+include = ["diragnosis*"]
 
 [options.package_data]
-ragnosis = ["*"]
+diragnosis = ["*"]
diff --git a/pypi_package/src/diragnosis.egg-info/PKG-INFO b/pypi_package/src/diragnosis.egg-info/PKG-INFO
@@ -0,0 +1,175 @@
+Metadata-Version: 2.2
+Name: diragnosis
+Version: 0.0.0.post1
+Summary: diRAGnosis - Diagnose the performance of your RAG!
+Author-email: Clelia Astra Bertelli <astraberte9@gmail.com>
+Project-URL: Homepage, https://github.com/AstraBert/RAGnosis
+Project-URL: Issues, https://github.com/AstraBert/RAGnosis/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: llama-index
+Requires-Dist: llama-index-embeddings-openai
+Requires-Dist: llama-index-embeddings-huggingface
+Requires-Dist: llama-index-embeddings-cohere
+Requires-Dist: llama-index-embeddings-mistralai
+Requires-Dist: llama-index-llms-openai
+Requires-Dist: llama-index-llms-cohere
+Requires-Dist: llama-index-llms-groq
+Requires-Dist: llama-index-llms-mistralai
+Requires-Dist: llama-index-llms-anthropic
+Requires-Dist: llama-index-vector-stores-qdrant
+Requires-Dist: fastembed
+Requires-Dist: qdrant_client
+Requires-Dist: pydantic
+Requires-Dist: pandas
+
+<h1 align="center">diRAGnosis🩺</h1>
+
+<h2 align="center">Diagnose the performance of your RAG</h2>
+
+<div align="center">
+    <h3>If you find diRAGnosis useful, please consider to donate and support the project:</h3>
+    <a href="https://github.com/sponsors/AstraBert"><img src="https://img.shields.io/badge/sponsor-30363D?style=for-the-badge&logo=GitHub-Sponsors&logoColor=#EA4AAA" alt="GitHub Sponsors Badge"></a>
+</div>
+<br>
+<div align="center">
+    <img src="https://raw.githubusercontent.com/AstraBert/diRAGnosis/main/logo.png" alt="diRAGnosis Logo" width=300 height=300>
+</div>
+
+**diRAGnosis** is a lightweight framework, built with [LlamaIndex](https://llamaindex.ai), that allows you to evaluate the performance of LLMs and retrieval models in RAG frameworks with your documents. It can be used as an application (thanks to [FastAPI](https://fastapi.tiangolo.com/) + [Gradio](https://gradio.app)) running locally on your machine, or as a python package.
+
+## Installation and usage
+
+### As an application
+
+Clone the application:
+
+```bash
+git clone https://github.com/AstraBert/diRAGnosis.git
+cd diRAGnosis/
+```
+
+**Docker (recommended)**🐋
+
+> _Required: [Docker](https://docs.docker.com/desktop/) and [docker compose](https://docs.docker.com/compose/)_
+
+- Launch the Docker application:
+
+```bash
+# If you are on Linux/macOS
+bash run_services.sh
+# If you are on Windows
+.\run_services.ps1
+```
+
+Or, if you prefer:
+
+```bash
+docker compose up db -d
+docker compose up dashboard -d
+```
+
+You will see the application running on http://localhost:8000/dashboard and you will be able to use it. Depending on your connection and on your hardware, the set up might take some time (up to 30 mins to set up) - but this is only for the first time your run it!
+
+
+**Source code**🗎
+
+> _Required: [Docker](https://docs.docker.com/desktop/), [docker compose](https://docs.docker.com/compose/) and [conda](https://anaconda.org/anaconda/conda)_
+
+- Set up diRAGnosis app using the dedicated script:
+
+```bash
+# For MacOs/Linux users
+bash setup.sh
+# For Windows users
+.\setup.ps1
+```
+
+- Or you can do it manually, if you prefer:
+
+```bash
+docker compose up db -d
+
+conda env create -f environment.yml
+
+conda activate eval-framework
+
+cd scripts/
+uvicorn main:app --host 0.0.0.0 --port 8000
+
+conda deactivate
+```
+
+You will see the application running on http://localhost:8000/dashboard and you will be able to use it.
+
+### As a python package
+
+As a python package, you will be able to install diRAGnosis using `pip`:
+
+```bash
+pip install diRAGnosis
+```
+
+Once you have installed it, you can import the four functions ([detailed in the dedicated reference file](https://github.com/AstraBert/diRAGnosis/tree/main/REFERENCE.md)) available for diRAGnosis like this:
+
+```python
+from diRAGnosis.evaluation import generate_question_dataset, evaluate_llms, evaluate_retrieval, display_available_providers
+```
+Once you imported them, this is an example of how you can use them:
+
+```python
+from qdrant_client import QdrantClient, AsyncQdrantClient 
+import asyncio
+import os
+from dotenv import load_dotenv
+import json
+
+load_dotenv()
+# import your API keys (in this case, only OpenAI)
+openai_api_key = os.environ["OPENAI_API_KEY"]
+# define your data 
+input_files = ["file1.pdf", "file2.pdf"]
+# create a Qdrant client (asynchronous and synchronous)
+qdrant_client = QdrantClient("http://localhost:6333")
+qdrant_aclient = AsyncQdrantClient("http://localhost:6333")
+# display available LLM and Embedding model providers
+display_available_providers()
+async def main():
+    # generate dataset
+    question_dataset, docs = await generate_question_dataset(input_files = input_files, llm = "OpenAI", model="gpt-4o-mini", api_key = openai_api_key, questions_per_chunk = 10, save_to_csv = "questions.csv", debug = True)
+    # evaluate LLM performance
+    binary_pass, scores = await evaluate_llms(qc = qdrant_client,  aqc = qdrant_aclient, llm = "OpenAI", model="gpt-4o-mini", api_key = openai_api_key, docs = docs, questions = question_dataset, embedding_provider = "HuggingFace", embedding_model = "Alibaba-NLP/gte-modernbert-base",  enable_hybrid = True, debug = True)
+    print(json.dumps(binary_pass, indent=4))
+    print(json.dumps(scores, indent=4))
+    # evaluate retrieval performance
+    retrieval_metrics = await evaluate_retrieval(qc = qdrant_client,  aqc = qdrant_aclient, input_files = input_files, llm = "OpenAI", model="gpt-4o-mini", api_key = openai_api_key, embedding_provider = "HuggingFace", embedding_model = "Alibaba-NLP/gte-modernbert-base", questions_per_chunk = 5, enable_hybrid = True, debug = True)
+    print(json.dumps(retrieval_metrics, indent=4))
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## How it works
+
+<div align="center">
+    <img src="https://raw.githubusercontent.com/AstraBert/diRAGnosis/main/workflow.png" alt="diRAGnosis Workflow">
+</div>
+
+diRAGnosis takes care of the evaluation of LLM and retrieval model performance on your documents in a completely automated way:
+
+- Once your documents are uploaded, they are converted into a synthetic question dataset (either for Retrieval Augmented Generation or for retrieval only) by an LLM of your choice
+- The documents are also chunked and uploaded to a vector database served by [Qdrant](https://qdrant.tech) - you can choose a semantic search only or an hybrid search setting
+- The LLMs are evaluated, with binary pass and with scores, on the faithfulness and relevancy of their answers based on the questions they are given and on the retrieved context that is associated to each question
+- The retrieval model is evaluated according to hit rate (retrieval of the correct document as first document) and to MRR (Mean Reciprocal Ranking, i.e. the positioning of the correct document in the ranking of the retrieved documents)
+- The metrics are returned to the user
+
+## Contributing
+
+Contributions are always welcome! Follow the contributions guidelines reported [here](https://github.com/AstraBert/diRAGnosis/tree/main/CONTRIBUTING.md).
+
+## License and rights of usage
+
+The software is provided under MIT [license](https://github.com/AstraBert/diRAGnosis/tree/main/LICENSE).
diff --git a/pypi_package/src/diragnosis.egg-info/SOURCES.txt b/pypi_package/src/diragnosis.egg-info/SOURCES.txt
@@ -0,0 +1,9 @@
+README.md
+pyproject.toml
+src/diragnosis/__init__.py
+src/diragnosis/evaluation.py
+src/diragnosis.egg-info/PKG-INFO
+src/diragnosis.egg-info/SOURCES.txt
+src/diragnosis.egg-info/dependency_links.txt
+src/diragnosis.egg-info/requires.txt
+src/diragnosis.egg-info/top_level.txt
diff --git a/pypi_package/src/diragnosis.egg-info/dependency_links.txt b/pypi_package/src/diragnosis.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pypi_package/src/diragnosis.egg-info/requires.txt b/pypi_package/src/diragnosis.egg-info/requires.txt
@@ -0,0 +1,15 @@
+llama-index
+llama-index-embeddings-openai
+llama-index-embeddings-huggingface
+llama-index-embeddings-cohere
+llama-index-embeddings-mistralai
+llama-index-llms-openai
+llama-index-llms-cohere
+llama-index-llms-groq
+llama-index-llms-mistralai
+llama-index-llms-anthropic
+llama-index-vector-stores-qdrant
+fastembed
+qdrant_client
+pydantic
+pandas
diff --git a/pypi_package/src/diragnosis.egg-info/top_level.txt b/pypi_package/src/diragnosis.egg-info/top_level.txt
@@ -0,0 +1 @@
+diragnosis
diff --git a/pypi_package/src/diragnosis/__init__.py b/pypi_package/src/diragnosis/__init__.py
diff --git a/pypi_package/src/diragnosis/evaluation.py b/pypi_package/src/diragnosis/evaluation.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Contributing to RAGnosis`
	`1`	`+# Contributing to diRAGnosis`
`2`	`2`
`3`	`3`	`Do you want to contribute to this project? Make sure to read this guidelines first :)`
`4`	`4`