@@ -43,7 +43,7 @@ def process_file(agent: Agent):
4343 name = "streaming_parent_task" , track_started = True , bind = True , serializer = "pickle"
4444)
4545def streaming_parent_task (
46- self , agent : Agent , result_handler : ResultHandler , batch_size : int = 2
46+ self , agent : Agent , result_handler : ResultHandler , batch_size : int = 10
4747):
4848 """
4949 This task is used to launch the two tasks that are doing the real work, so that
@@ -155,30 +155,30 @@ async def async_process_streaming_output(
155155
156156 input_job_running = True
157157
158+ data = await consumer .getmany (timeout_ms = 3000 , max_records = batch_size )
159+
158160 while input_job_running :
159- try :
160- data = await consumer .getmany (timeout_ms = 3000 , max_records = batch_size )
161- for tp , messages in data .items ():
162- if messages :
163- logger .debug (f"Handling { messages = } in topic { tp .topic } " )
164- data = [msg .value for msg in messages ]
165- result_handler (data )
166- logger .debug (
167- f"Handled { len (messages )} messages in topic { tp .topic } "
168- )
169- else :
170- logger .debug (f"No messages in topic { tp .topic } " )
171-
172- if not data :
173- logger .info (f"No messages in any topic" )
174- finally :
175- job = process_file_streaming .AsyncResult (input_job_id )
176- # TODO no way to recover here if connection to main app is lost, job will be stuck at "PENDING" so this will loop forever
177- if job .status in ["SUCCESS" , "FAILURE" , "REVOKED" ]:
178- input_job_running = False
179- logger .info (f"Input job done, stopping output job" )
161+ for tp , messages in data .items ():
162+ if messages :
163+ logger .debug (f"Handling { messages = } in topic { tp .topic } " )
164+ data = [msg .value for msg in messages ]
165+ result_handler (data )
166+ logger .debug (f"Handled { len (messages )} messages in topic { tp .topic } " )
180167 else :
181- logger .info (f"Input job still running, keeping output job running" )
168+ logger .debug (f"No messages in topic { tp .topic } " )
169+
170+ if not data :
171+ logger .info (f"No messages in any topic" )
172+
173+ job = process_file_streaming .AsyncResult (input_job_id )
174+ # we are getting packets from the output topic here to check if its empty and continue processing if its not
175+ data = await consumer .getmany (timeout_ms = 3000 , max_records = batch_size )
176+ # TODO no way to recover here if connection to main app is lost, job will be stuck at "PENDING" so this will loop forever
177+ if job .status in ["SUCCESS" , "FAILURE" , "REVOKED" ] and len (data .items ()) == 0 :
178+ input_job_running = False
179+ logger .info (f"Input job done, stopping output job" )
180+ else :
181+ logger .info (f"Input job still running, keeping output job running" )
182182
183183 await consumer .stop ()
184184
0 commit comments