Skip to content

Commit 059dc48

Browse files
authored
add num_docs (#62)
* add num_docs add num_docs * Update test_inmemory_vectordb.py * Update test_hnswlib_vectordb.py
1 parent a8531f6 commit 059dc48

File tree

5 files changed

+20
-3
lines changed

5 files changed

+20
-3
lines changed

tests/unit/test_hnswlib_vectordb.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,11 @@ def test_hnswlib_vectordb_restore(docs_to_index, tmpdir):
169169
assert len(res.matches) == 10
170170
# assert res.id == res.matches[0].id
171171
# assert res.text == res.matches[0].text
172-
# assert res.scores[0] < 0.001 # some precision issues, should be 0
172+
# assert res.scores[0] < 0.001 # some precision issues, should be 0
173+
174+
def test_hnswlib_num_dos(tmpdir):
175+
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
176+
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
177+
db.index(inputs=DocList[MyDoc](doc_list))
178+
x=db.num_docs()
179+
assert x['num_docs']==1000

tests/unit/test_inmemory_vectordb.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,10 @@ def test_inmemory_vectordb_restore(docs_to_index, tmpdir):
172172
assert res.id == res.matches[0].id
173173
assert res.text == res.matches[0].text
174174
assert res.scores[0] > 0.99 # some precision issues, should be 1
175+
176+
def test_inmemory_num_dos(tmpdir):
177+
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
178+
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
179+
db.index(inputs=DocList[MyDoc](doc_list))
180+
x=db.num_docs()
181+
assert x['num_docs']==1000

vectordb/db/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,9 @@ async def _deploy():
227227
ret = asyncio.run(_deploy())
228228
return ret
229229

230+
def num_docs(self, **kwargs):
231+
return self._executor.num_docs()
232+
230233
@pass_kwargs_as_params
231234
@unify_input_output
232235
def index(self, docs: 'DocList[TSchema]', parameters: Optional[Dict] = None, **kwargs):

vectordb/db/executors/hnsw_indexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ async def async_update(self, docs, *args, **kwargs):
105105
return self.update(docs, *args, **kwargs)
106106

107107
def num_docs(self, **kwargs):
108-
return {'num_docs': self._index.num_docs()}
108+
return {'num_docs': self._indexer.num_docs()}
109109

110110
def snapshot(self, snapshot_dir):
111111
# TODO: Maybe copy the work_dir to workspace if `handle` is False

vectordb/db/executors/inmemory_exact_indexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def update(self, docs, *args, **kwargs):
7171
return self._index(docs)
7272

7373
def num_docs(self, *args, **kwargs):
74-
return {'num_docs': self._index.num_docs()}
74+
return {'num_docs': self._indexer.num_docs()}
7575

7676
def snapshot(self, snapshot_dir):
7777
snapshot_file = f'{snapshot_dir}/index.bin'

0 commit comments

Comments
 (0)