Skip to content

Commit 064fca5

Browse files
Packaging, optional dependencies (#11)
- Using pyproject.toml to define a package. - Optional dependencies like Haystack, DeepEval (+Langchain), Docs creation. - Update README with instructions.
1 parent 1e34ee0 commit 064fca5

File tree

10 files changed

+86
-48
lines changed

10 files changed

+86
-48
lines changed

README.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,16 @@ files.
1515
Comments, suggestions, issues and pull-requests are welcomed! ❤️
1616

1717
### Installation
18-
Clone locally and run:
18+
Clone and run:
1919

2020
```sh
21-
pip install -r requirements.txt
21+
pip install -e .
22+
```
23+
24+
Optional packages can be installed:
25+
```sh
26+
pip install -e .[haystack]
27+
pip install -e .[deepeval]
2228
```
2329

2430
### Quick Start

docs/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
Install python packages required for building mkdocs documentation website.
66

77
``` sh
8-
pip install -r docs/requirements.txt
8+
pip install -e .[docs]
99
```
1010

1111
## Adding new content

docs/index.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,16 @@ files.
1515
Comments, suggestions, issues and pull-requests are welcomed! ❤️
1616

1717
### Installation
18-
Clone locally and run:
18+
Clone and run:
1919

2020
```sh
21-
pip install -r requirements.txt
21+
pip install -e .
22+
```
23+
24+
Optional packages can be installed:
25+
```sh
26+
pip install -e .[haystack]
27+
pip install -e .[deepeval]
2228
```
2329

2430
### Quick Start

docs/requirements.txt

-7
This file was deleted.

pyproject.toml

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
[project]
2+
name = "ragfoundry"
3+
version = "1.1.1"
4+
description = "Framework for enhancing LLMs for RAG tasks using fine-tuning."
5+
readme = "README.md"
6+
license = {file = "LICENSE"}
7+
requires-python = ">=3.10"
8+
dependencies = [
9+
"bert-score>=0.3.13",
10+
"bitsandbytes==0.42.0",
11+
"datasets==2.16.1",
12+
"evaluate==0.4.1",
13+
"hydra-core==1.3.2",
14+
"nltk==3.9",
15+
"openai==1.23.3",
16+
"peft==0.11.1",
17+
"pyyaml==6.0.1",
18+
"rouge-score==0.1.2",
19+
"sentence-transformers==2.4.0",
20+
"sentencepiece==0.2.0",
21+
"torch==2.2.0",
22+
"transformers==4.42.3",
23+
"trl==0.8.6",
24+
"wandb==0.16.4",
25+
]
26+
27+
[project.urls]
28+
Homepage = "https://github.com/IntelLabs/RAGFoundry"
29+
Documentation = "https://intellabs.github.io/RAGFoundry/"
30+
31+
[tool.setuptools]
32+
packages = ["ragfoundry"]
33+
34+
[project.optional-dependencies]
35+
deepeval = [
36+
"deepeval==0.21.73",
37+
]
38+
haystack = [
39+
"haystack-ai==2.3.1",
40+
"qdrant-haystack>=5.0.0",
41+
]
42+
docs = [
43+
"mkdocs-gen-files>=0.5.0",
44+
"mkdocs-material-extensions>=1.3.1",
45+
"mkdocs-material>=9.5.30",
46+
"mkdocstrings-python-legacy>=0.2.3",
47+
"mkdocstrings-python>=1.10.7",
48+
"mkdocstrings>=0.25.2",
49+
"pymdown-extensions>=10.9",
50+
]
51+

ragfoundry/evaluation/deep.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
import logging
22
import math
33

4-
from deepeval.test_case import LLMTestCase
5-
from langchain_openai import AzureChatOpenAI
6-
74
from .base import MetricBase
85

96

@@ -23,9 +20,13 @@ def __init__(
2320
**kwargs,
2421
):
2522
super().__init__(key_names, **kwargs)
23+
from deepeval.test_case import LLMTestCase
24+
from langchain_openai import AzureChatOpenAI
25+
2626
self.local = True
2727
self.query = self.key_names["query"]
2828
self.context = self.key_names["context"]
29+
self.test_case = LLMTestCase
2930

3031
self.model = AzureChatOpenAI(
3132
api_version=api_version,
@@ -54,7 +55,7 @@ def measure(self, example):
5455
output = example[self.field]
5556
context = example[self.context]
5657

57-
test_case = LLMTestCase(
58+
test_case = self.test_case(
5859
input=query,
5960
actual_output=output or "No answer.",
6061
retrieval_context=[context] if isinstance(context, str) else context,
@@ -92,7 +93,7 @@ def measure(self, example):
9293
output = example[self.field]
9394
context = example[self.context]
9495

95-
test_case = LLMTestCase(
96+
test_case = self.test_case(
9697
input=query,
9798
actual_output=output or "No answer.",
9899
retrieval_context=[context] if isinstance(context, str) else context,
@@ -126,7 +127,7 @@ def measure(self, example):
126127
output = example[self.field]
127128
context = example[self.context]
128129

129-
test_case = LLMTestCase(
130+
test_case = self.test_case(
130131
input="",
131132
actual_output=output,
132133
context=[context] if isinstance(context, str) else context,

ragfoundry/evaluation/metrics.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
import string
33
from collections import Counter, defaultdict
44

5-
import evaluate
6-
import sklearn
7-
85
from .base import MetricBase
96

107

@@ -19,6 +16,8 @@ def __init__(self, key_names, metric_names: list[str], **kwargs):
1916
key_names (dict): A dictionary containing the field names.
2017
metric_names (list[str]): A list of metric names.
2118
"""
19+
import evaluate
20+
2221
super().__init__(key_names, **kwargs)
2322
self.metric_names = metric_names
2423
self.metric = evaluate.combine(metric_names)
@@ -63,10 +62,14 @@ class Classification(MetricBase):
6362
def __init__(
6463
self, key_names: dict, mapping: dict, else_value: int = 2, **kwargs
6564
) -> None:
65+
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
66+
6667
super().__init__(key_names, **kwargs)
6768
self.local = False
6869
self.mapping = mapping
6970
self.else_value = else_value
71+
self.precision_recall_fn = precision_recall_fscore_support
72+
self.accuracy_fn = accuracy_score
7073

7174
def measure(self, example: dict):
7275
inputs = example[self.field]
@@ -83,10 +86,10 @@ def measure(self, example: dict):
8386
self.mapping.get(normalize_text(t).strip(), self.else_value) for t in targets
8487
]
8588

86-
precision, recall, f1, _ = sklearn.metrics.precision_recall_fscore_support(
89+
precision, recall, f1, _ = self.precision_recall_fn(
8790
targets, inputs, average="macro"
8891
)
89-
accuracy = sklearn.metrics.accuracy_score(targets, inputs)
92+
accuracy = self.accuracy_fn(targets, inputs)
9093

9194
return {
9295
"accuracy": float(accuracy),

ragfoundry/processing/global_steps/sampling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,5 +101,5 @@ def __init__(self, k, output_key="fewshot", input_dataset=None, **kwargs):
101101
output_key=output_key,
102102
input_key=None,
103103
input_dataset=input_dataset,
104-
**kwargs
104+
**kwargs,
105105
)

ragfoundry/processing/local_steps/retrievers/haystack.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from haystack import Pipeline
2-
31
from ...step import LocalStep
42

53

@@ -10,6 +8,8 @@ class HaystackRetriever(LocalStep):
108

119
def __init__(self, pipeline_or_yaml_path, docs_key, query_key, **kwargs):
1210
super().__init__(**kwargs)
11+
from haystack import Pipeline
12+
1313
if isinstance(pipeline_or_yaml_path, str):
1414
self.pipe = Pipeline.load(open(pipeline_or_yaml_path))
1515
else:

requirements.txt

-22
This file was deleted.

0 commit comments

Comments
 (0)