Use linger_ms to delay batches

vmaurin · vmaurin · commit 78ccdfb7f15a · 2025-12-09T17:13:23.000+01:00
Currently, the linger_ms parameter is used to limit the number of request sent to a given broker as adding at minimum linger_ms between to produce requests. It doesn't produce the expected result when the max size of batches is also reached. The purpose of linger_ms should be not limiting the rate of request, but instead keeping a batch still open long enough so it has the opportunity to grow bigger. Moving linger_ms to the message accumulator (like in the java client) gives the opportunity to have a condition both on the max size and the lingering time. fixes #1137
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -25,7 +25,9 @@ Bugfixes:
 * Ensure the transaction coordinator is refreshed after broker fail‑over,
   so transactional producers resume once a new coordinator is elected.
   (pr #1135 by @vmaurin)
-
+* Properly manage batch max size and linger_ms. A batch will be always
+  produced if the max size or the lingering time is reached
+  (pr #1142 by @vmaurin)
 
 Misc:
 * Use SPDX license expression for project metadata.
diff --git a/aiokafka/producer/message_accumulator.py b/aiokafka/producer/message_accumulator.py
@@ -129,12 +129,14 @@ def record_count(self):
 
 
 class MessageBatch:
-    """This class incapsulate operations with batch of produce messages"""
+    """This class encapsulate operations with batch of produce messages"""
 
-    def __init__(self, tp, builder, ttl):
+    def __init__(self, tp, builder, ttl, linger_time, max_size):
         self._builder = builder
         self._tp = tp
         self._ttl = ttl
+        self._linger_time = linger_time
+        self._max_size = max_size
         self._ctime = time.monotonic()
 
         # Waiters
@@ -266,6 +268,13 @@ async def wait_drain(self, timeout=None):
         if waiter.done():
             waiter.result()  # Check for exception
 
+    def ready(self):
+        """Check that batch is ready or not"""
+        return (
+            self._builder.record_count() >= self._max_size
+            or (time.monotonic() - self._ctime) >= self._linger_time
+        )
+
     def expired(self):
         """Check that batch is expired or not"""
         return (time.monotonic() - self._ctime) > self._ttl
@@ -312,6 +321,7 @@ def __init__(
         *,
         txn_manager=None,
         loop=None,
+        linger_ms=0,
     ):
         if loop is None:
             loop = get_running_loop()
@@ -325,6 +335,7 @@ def __init__(
         self._wait_data_future = loop.create_future()
         self._closed = False
         self._txn_manager = txn_manager
+        self._linger_time = linger_ms / 1000
 
         self._exception = None  # Critical exception
 
@@ -466,6 +477,9 @@ def drain_by_nodes(self, ignore_nodes, muted_partitions=frozenset()):
                 continue
             elif ignore_nodes and leader in ignore_nodes:
                 continue
+            elif not self._batches[tp][0].ready():
+                # batch should still linger
+                continue
 
             batch = self._pop_batch(tp)
             # We can get an empty batch here if all `append()` calls failed
@@ -506,7 +520,9 @@ def _append_batch(self, builder, tp):
         if self._txn_manager is not None:
             self._txn_manager.maybe_add_partition_to_txn(tp)
 
-        batch = MessageBatch(tp, builder, self._batch_ttl)
+        batch = MessageBatch(
+            tp, builder, self._batch_ttl, self._linger_time, self._batch_size
+        )
         self._batches[tp].append(batch)
         if not self._wait_data_future.done():
             self._wait_data_future.set_result(None)
diff --git a/aiokafka/producer/producer.py b/aiokafka/producer/producer.py
@@ -309,13 +309,13 @@ def __init__(
             self._request_timeout_ms / 1000,
             txn_manager=self._txn_manager,
             loop=loop,
+            linger_ms=linger_ms,
         )
         self._sender = Sender(
             self.client,
             acks=acks,
             txn_manager=self._txn_manager,
             retry_backoff_ms=retry_backoff_ms,
-            linger_ms=linger_ms,
             message_accumulator=self._message_accumulator,
             request_timeout_ms=request_timeout_ms,
         )
diff --git a/aiokafka/producer/sender.py b/aiokafka/producer/sender.py
@@ -1,7 +1,6 @@
 import asyncio
 import collections
 import logging
-import time
 
 import aiokafka.errors as Errors
 from aiokafka.client import ConnectionGroup, CoordinationType
@@ -55,7 +54,6 @@ def __init__(
         txn_manager,
         message_accumulator,
         retry_backoff_ms,
-        linger_ms,
         request_timeout_ms,
     ):
         self.client = client
@@ -69,7 +67,6 @@ def __init__(
         self._coordinators = {}
         self._retry_backoff = retry_backoff_ms / 1000
         self._request_timeout_ms = request_timeout_ms
-        self._linger_time = linger_ms / 1000
 
     async def start(self):
         # If producer is idempotent we need to assure we have PID found
@@ -286,17 +283,9 @@ async def _send_produce_req(self, node_id, batches):
             node_id (int): kafka broker identifier
             batches (dict): dictionary of {TopicPartition: MessageBatch}
         """
-        t0 = time.monotonic()
-
         handler = SendProduceReqHandler(self, batches)
         await handler.do(node_id)
 
-        # if batches for node is processed in less than a linger seconds
-        # then waiting for the remaining time
-        sleep_time = self._linger_time - (time.monotonic() - t0)
-        if sleep_time > 0:
-            await asyncio.sleep(sleep_time)
-
         self._in_flight.remove(node_id)
         for tp in batches:
             self._muted_partitions.remove(tp)
diff --git a/tests/test_message_accumulator.py b/tests/test_message_accumulator.py
@@ -102,6 +102,40 @@ def mocked_leader_for_partition(tp):
         m_set1 = batches[1].get(tp1)
         self.assertEqual(m_set1._builder._relative_offset, 1)
 
+    @run_until_complete
+    async def test_batch_ready(self):
+        tp0 = TopicPartition("test-topic", 0)
+        tp1 = TopicPartition("test-topic", 1)
+
+        def mocked_leader_for_partition(tp):
+            if tp == tp0:
+                return 0
+            return None
+
+        cluster = ClusterMetadata(metadata_max_age_ms=10000)
+        cluster.leader_for_partition = mock.MagicMock()
+        cluster.leader_for_partition.side_effect = mocked_leader_for_partition
+
+        ma = MessageAccumulator(
+            cluster, compression_type=0, batch_size=3, batch_ttl=10, linger_ms=1000
+        )
+        await ma.add_message(tp0, None, b"hello", timeout=2)
+        await ma.add_message(tp1, None, b"hello", timeout=2)
+
+        batches, _ = ma.drain_by_nodes(ignore_nodes=[])
+        # it should not be ready yet (linger time)
+        self.assertEqual(len(batches), 0)
+        await ma.add_message(tp0, None, b"hello", timeout=2)
+        await ma.add_message(tp0, None, b"hello", timeout=2)
+        batches, _ = ma.drain_by_nodes(ignore_nodes=[])
+        # it should be ready (max size reached)
+        self.assertEqual(len(batches), 1)
+        self.assertEqual(batches[0][tp0].ready(), True)
+        await asyncio.sleep(1)
+        # it should be ready (linger time reached)
+        self.assertEqual(len(batches), 1)
+        self.assertEqual(batches[0][tp1].ready(), True)
+
     @run_until_complete
     async def test_batch_done(self):
         tp0 = TopicPartition("test-topic", 0)
diff --git a/tests/test_sender.py b/tests/test_sender.py
@@ -73,7 +73,6 @@ async def _setup_sender(self, no_init=False):
             txn_manager=tm,
             message_accumulator=ma,
             retry_backoff_ms=100,
-            linger_ms=0,
             request_timeout_ms=40000,
         )
         self.add_cleanup(sender.close)

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@ async def _setup_sender(self, no_init=False):`
`73`	`73`	`txn_manager=tm,`
`74`	`74`	`message_accumulator=ma,`
`75`	`75`	`retry_backoff_ms=100,`
`76`		`- linger_ms=0,`
`77`	`76`	`request_timeout_ms=40000,`
`78`	`77`	`)`
`79`	`78`	`self.add_cleanup(sender.close)`