Skip to content

Commit addd19f

Browse files
authored
Merge pull request #14 from AstraBert/v1.0.1-branch
v1.0.1 bugfixes
2 parents 423a0fc + 17210ee commit addd19f

File tree

3 files changed

+25
-21
lines changed

3 files changed

+25
-21
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "sentrev"
7-
version = "1.0.0"
7+
version = "1.0.1"
88
authors = [
99
{ name="Clelia (Astra) Bertelli", email="[email protected]" },
1010
]

src/sentrev/evaluator.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -383,13 +383,15 @@ def evaluate_dense_retrieval(
383383
client.delete_collection(collection_name)
384384
performances_df = pd.DataFrame.from_dict(performances)
385385
performances_df.to_csv(csv_path, index=False)
386+
csv_name = os.path.basename(csv_path)
387+
csv_path_base = os.path.dirname(csv_path)
386388
if plot:
387-
path_time = csv_path.split(".")[0] + "_times.png"
388-
path_sr = csv_path.split(".")[0] + "_success_rate.png"
389-
path_mrr = csv_path.split(".")[0] + "_mrr.png"
390-
path_co2 = csv_path.split(".")[0] + "_co2.png"
391-
path_precision = csv_path.split(".")[0] + "_precision.png"
392-
path_nonrelevant = csv_path.split(".")[0] + "_nonrelevant.png"
389+
path_time = csv_path_base + "/" + csv_name.split(".")[0] + "_times.png"
390+
path_sr = csv_path_base + "/" + csv_name.split(".")[0] + "_success_rate.png"
391+
path_mrr = csv_path_base + "/" + csv_name.split(".")[0] + "_mrr.png"
392+
path_co2 = csv_path_base + "/" + csv_name.split(".")[0] + "_co2.png"
393+
path_precision = csv_path_base + "/" + csv_name.split(".")[0] + "_precision.png"
394+
path_nonrelevant = csv_path_base + "/" + csv_name.split(".")[0] + "_nonrelevant.png"
393395
X = performances["encoder"]
394396
y_times = performances["average_time"]
395397
yerr_times = performances["stdev_time"]
@@ -747,13 +749,15 @@ def evaluate_sparse_retrieval(
747749
client.delete_collection(collection_name)
748750
performances_df = pd.DataFrame.from_dict(performances)
749751
performances_df.to_csv(csv_path, index=False)
752+
csv_name = os.path.basename(csv_path)
753+
csv_path_base = os.path.dirname(csv_path)
750754
if plot:
751-
path_time = csv_path.split(".")[0] + "_times.png"
752-
path_sr = csv_path.split(".")[0] + "_success_rate.png"
753-
path_mrr = csv_path.split(".")[0] + "_mrr.png"
754-
path_co2 = csv_path.split(".")[0] + "_co2.png"
755-
path_precision = csv_path.split(".")[0] + "_precision.png"
756-
path_nonrelevant = csv_path.split(".")[0] + "_nonrelevant.png"
755+
path_time = csv_path_base + "/" + csv_name.split(".")[0] + "_times.png"
756+
path_sr = csv_path_base + "/" + csv_name.split(".")[0] + "_success_rate.png"
757+
path_mrr = csv_path_base + "/" + csv_name.split(".")[0] + "_mrr.png"
758+
path_co2 = csv_path_base + "/" + csv_name.split(".")[0] + "_co2.png"
759+
path_precision = csv_path_base + "/" + csv_name.split(".")[0] + "_precision.png"
760+
path_nonrelevant = csv_path_base + "/" + csv_name.split(".")[0] + "_nonrelevant.png"
757761
X = performances["encoder"]
758762
y_times = performances["average_time"]
759763
yerr_times = performances["stdev_time"]

src/sentrev/utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -195,14 +195,14 @@ def collect_data(self):
195195
for text in self.pages:
196196
contents = text.page_content.split("\n")
197197
contents = remove_items(contents, "")
198-
for content in contents:
199-
self.documents.append(
200-
{
201-
"text": content,
202-
"source": text.metadata["source"],
203-
"page": str(text.metadata["page"]),
204-
}
205-
)
198+
content = "\n".join(contents)
199+
self.documents.append(
200+
{
201+
"text": content,
202+
"source": text.metadata["source"],
203+
"page": str(text.metadata["page"]),
204+
}
205+
)
206206
return self.documents
207207

208208
def qdrant_collection_and_upload(self):

0 commit comments

Comments
 (0)