From c7966e3614417ed41585a26cc07502d7804ec3b5 Mon Sep 17 00:00:00 2001 From: Robert Escriva Date: Thu, 5 Jun 2025 18:09:44 -0700 Subject: [PATCH 1/3] [TST] Make random batch sizes in prop tests --- chromadb/utils/batch_utils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/chromadb/utils/batch_utils.py b/chromadb/utils/batch_utils.py index f3881bc6a03..918c5b003b6 100644 --- a/chromadb/utils/batch_utils.py +++ b/chromadb/utils/batch_utils.py @@ -18,19 +18,22 @@ def create_batches( _batches: List[ Tuple[IDs, Embeddings, Optional[Metadatas], Optional[Documents]] ] = [] - if len(ids) > api.get_max_batch_size(): - # create split batches - for i in range(0, len(ids), api.get_max_batch_size()): + max_batch_size = api.get_max_batch_size() + offset = 0 + if len(ids) > max_batch_size: + while offset < len(ids): + batch_size = random.randint(1, max_batch_size): _batches.append( ( # type: ignore - ids[i : i + api.get_max_batch_size()], - embeddings[i : i + api.get_max_batch_size()] + ids[offset : offset + batch_size], + embeddings[offset : offset + batch_size] if embeddings else None, - metadatas[i : i + api.get_max_batch_size()] if metadatas else None, - documents[i : i + api.get_max_batch_size()] if documents else None, + metadatas[offset : offset + batch_size] if metadatas else None, + documents[offset : offset + batch_size] if documents else None, ) ) + offset += batch_size else: _batches.append((ids, embeddings, metadatas, documents)) # type: ignore return _batches From 360b5f0442b9ffca68de011e086cc3040b9625ed Mon Sep 17 00:00:00 2001 From: Robert Escriva Date: Fri, 6 Jun 2025 08:46:24 -0700 Subject: [PATCH 2/3] typo fix --- chromadb/utils/batch_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/utils/batch_utils.py b/chromadb/utils/batch_utils.py index 918c5b003b6..4a5f2acf742 100644 --- a/chromadb/utils/batch_utils.py +++ b/chromadb/utils/batch_utils.py @@ -22,7 +22,7 @@ def create_batches( offset = 0 if len(ids) > max_batch_size: while offset < len(ids): - batch_size = random.randint(1, max_batch_size): + batch_size = random.randint(1, max_batch_size) _batches.append( ( # type: ignore ids[offset : offset + batch_size], From 951312501c9a2eaa2a036972e048867bc65c3106 Mon Sep 17 00:00:00 2001 From: Robert Escriva Date: Mon, 9 Jun 2025 08:31:09 -0700 Subject: [PATCH 3/3] import random --- chromadb/utils/batch_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/utils/batch_utils.py b/chromadb/utils/batch_utils.py index 4a5f2acf742..9fa210cbcfb 100644 --- a/chromadb/utils/batch_utils.py +++ b/chromadb/utils/batch_utils.py @@ -1,3 +1,4 @@ +import random from typing import Optional, Tuple, List from chromadb.api import BaseAPI from chromadb.api.types import (