Skip to content

Commit aadc298

Browse files
committed
fix: implement score_threshold filtering for OceanBase vector search
OceanBase vector search was ignoring the score_threshold parameter, returning all top_k results regardless of their similarity scores. This caused annotation queries with high thresholds (e.g., score_threshold=1) to return low-scoring matches that should have been filtered out. Added score_threshold parameter handling to filter results based on similarity scores, bringing OceanBase behavior in line with other vector stores (Chroma, PGVector, etc.).
1 parent 1dfde24 commit aadc298

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
270270
self._client.set_ob_hnsw_ef_search(ef_search)
271271
self._hnsw_ef_search = ef_search
272272
topk = kwargs.get("top_k", 10)
273+
score_threshold = float(kwargs.get("score_threshold") or 0.0)
273274
try:
274275
cur = self._client.ann_search(
275276
table_name=self._collection_name,
@@ -286,13 +287,15 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
286287
docs = []
287288
for _text, metadata, distance in cur:
288289
metadata = json.loads(metadata)
289-
metadata["score"] = 1 - distance / math.sqrt(2)
290-
docs.append(
291-
Document(
292-
page_content=_text,
293-
metadata=metadata,
290+
score = 1 - distance / math.sqrt(2)
291+
metadata["score"] = score
292+
if score >= score_threshold:
293+
docs.append(
294+
Document(
295+
page_content=_text,
296+
metadata=metadata,
297+
)
294298
)
295-
)
296299
return docs
297300

298301
def delete(self):

0 commit comments

Comments
 (0)