Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions chromadb/utils/batch_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from typing import Optional, Tuple, List
from chromadb.api import BaseAPI
from chromadb.api.types import (
Expand All @@ -18,19 +19,22 @@ def create_batches(
_batches: List[
Tuple[IDs, Embeddings, Optional[Metadatas], Optional[Documents]]
] = []
if len(ids) > api.get_max_batch_size():
# create split batches
for i in range(0, len(ids), api.get_max_batch_size()):
max_batch_size = api.get_max_batch_size()
offset = 0
if len(ids) > max_batch_size:
while offset < len(ids):
batch_size = random.randint(1, max_batch_size)
_batches.append(
( # type: ignore
ids[i : i + api.get_max_batch_size()],
embeddings[i : i + api.get_max_batch_size()]
ids[offset : offset + batch_size],
embeddings[offset : offset + batch_size]
if embeddings
else None,
metadatas[i : i + api.get_max_batch_size()] if metadatas else None,
documents[i : i + api.get_max_batch_size()] if documents else None,
metadatas[offset : offset + batch_size] if metadatas else None,
documents[offset : offset + batch_size] if documents else None,
)
)
offset += batch_size
else:
_batches.append((ids, embeddings, metadatas, documents)) # type: ignore
return _batches
Loading