2525
2626#include " ../program.hpp"
2727#include " ../ur_interface_loader.hpp"
28+ #include " ur.hpp"
2829#include " ur_api.h"
2930#include " ze_api.h"
3031#include < cstddef>
@@ -162,6 +163,19 @@ ur_queue_batched_t::onEventWaitListUse(ur_event_generation_t batch_generation) {
162163 }
163164}
164165
166+ ur_result_t ur_queue_batched_t::markIssuedCommandInBatch (
167+ locked<batch_manager> &batchLocked) {
168+ if (batchLocked->isLimitOfEnqueuedCommandsReached ()) {
169+ UR_CALL (queueFinishUnlocked (batchLocked));
170+
171+ batchLocked->markFirstIssuedCommand ();
172+ } else {
173+ batchLocked->markNextIssuedCommand ();
174+ }
175+
176+ return UR_RESULT_SUCCESS;
177+ }
178+
165179ur_result_t ur_queue_batched_t::enqueueKernelLaunch (
166180 ur_kernel_handle_t hKernel, uint32_t workDim,
167181 const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
@@ -176,7 +190,7 @@ ur_result_t ur_queue_batched_t::enqueueKernelLaunch(
176190 TRACK_SCOPE_LATENCY (" ur_queue_batched_t::enqueueKernelLaunch" );
177191 auto currentRegular = currentCmdLists.lock ();
178192
179- currentRegular-> markIssuedCommand ( );
193+ markIssuedCommandInBatch (currentRegular );
180194
181195 UR_CALL (currentRegular->getActiveBatch ().appendKernelLaunch (
182196 hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
@@ -272,7 +286,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferRead(
272286
273287 auto lockedBatches = currentCmdLists.lock ();
274288
275- lockedBatches-> markIssuedCommand ( );
289+ markIssuedCommandInBatch (lockedBatches );
276290
277291 UR_CALL (lockedBatches->getActiveBatch ().appendMemBufferRead (
278292 hBuffer, false , offset, size, pDst, waitListView,
@@ -300,7 +314,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferWrite(
300314
301315 auto lockedBatches = currentCmdLists.lock ();
302316
303- lockedBatches-> markIssuedCommand ( );
317+ markIssuedCommandInBatch (lockedBatches );
304318
305319 UR_CALL (lockedBatches->getActiveBatch ().appendMemBufferWrite (
306320 hBuffer, false , offset, size, pSrc, waitListView,
@@ -325,7 +339,7 @@ ur_result_t ur_queue_batched_t::enqueueDeviceGlobalVariableWrite(
325339
326340 auto lockedBatch = currentCmdLists.lock ();
327341
328- lockedBatch-> markIssuedCommand ( );
342+ markIssuedCommandInBatch (lockedBatch );
329343
330344 UR_CALL (lockedBatch->getActiveBatch ().appendDeviceGlobalVariableWrite (
331345 hProgram, name, false , count, offset, pSrc, waitListView,
@@ -347,7 +361,7 @@ ur_result_t ur_queue_batched_t::enqueueDeviceGlobalVariableRead(
347361
348362 auto lockedBatch = currentCmdLists.lock ();
349363
350- lockedBatch-> markIssuedCommand ( );
364+ markIssuedCommandInBatch (lockedBatch );
351365
352366 UR_CALL (lockedBatch->getActiveBatch ().appendDeviceGlobalVariableRead (
353367 hProgram, name, false , count, offset, pDst, waitListView,
@@ -371,7 +385,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferFill(
371385
372386 auto lockedBatch = currentCmdLists.lock ();
373387
374- lockedBatch-> markIssuedCommand ( );
388+ markIssuedCommandInBatch (lockedBatch );
375389
376390 return lockedBatch->getActiveBatch ().appendMemBufferFill (
377391 hBuffer, pPattern, patternSize, offset, size, waitListView,
@@ -390,7 +404,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMMemcpy(
390404 wait_list_view (phEventWaitList, numEventsInWaitList, this );
391405 auto lockedBatch = currentCmdLists.lock ();
392406
393- lockedBatch-> markIssuedCommand ( );
407+ markIssuedCommandInBatch (lockedBatch );
394408
395409 UR_CALL (lockedBatch->getActiveBatch ().appendUSMMemcpy (
396410 false , pDst, pSrc, size, waitListView,
@@ -411,7 +425,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMFreeExp(
411425 wait_list_view (phEventWaitList, numEventsInWaitList, this );
412426 auto lockedBatch = currentCmdLists.lock ();
413427
414- lockedBatch-> markIssuedCommand ( );
428+ markIssuedCommandInBatch (lockedBatch );
415429
416430 UR_CALL (lockedBatch->getActiveBatch ().appendUSMFreeExp (
417431 this , pPool, pMem, waitListView,
@@ -431,7 +445,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferMap(
431445 wait_list_view (phEventWaitList, numEventsInWaitList, this );
432446 auto lockedBatch = currentCmdLists.lock ();
433447
434- lockedBatch-> markIssuedCommand ( );
448+ markIssuedCommandInBatch (lockedBatch );
435449
436450 UR_CALL (lockedBatch->getActiveBatch ().appendMemBufferMap (
437451 hBuffer, false , mapFlags, offset, size, waitListView,
@@ -453,7 +467,7 @@ ur_result_t ur_queue_batched_t::enqueueMemUnmap(
453467 wait_list_view (phEventWaitList, numEventsInWaitList, this );
454468 auto lockedBatch = currentCmdLists.lock ();
455469
456- lockedBatch-> markIssuedCommand ( );
470+ markIssuedCommandInBatch (lockedBatch );
457471
458472 return lockedBatch->getActiveBatch ().appendMemUnmap (
459473 hMem, pMappedPtr, waitListView,
@@ -471,7 +485,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferReadRect(
471485 wait_list_view (phEventWaitList, numEventsInWaitList, this );
472486 auto lockedBatch = currentCmdLists.lock ();
473487
474- lockedBatch-> markIssuedCommand ( );
488+ markIssuedCommandInBatch (lockedBatch );
475489
476490 UR_CALL (lockedBatch->getActiveBatch ().appendMemBufferReadRect (
477491 hBuffer, false , bufferOrigin, hostOrigin, region, bufferRowPitch,
@@ -497,7 +511,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferWriteRect(
497511 wait_list_view (phEventWaitList, numEventsInWaitList, this );
498512 auto lockedBatch = currentCmdLists.lock ();
499513
500- lockedBatch-> markIssuedCommand ( );
514+ markIssuedCommandInBatch (lockedBatch );
501515
502516 UR_CALL (lockedBatch->getActiveBatch ().appendMemBufferWriteRect (
503517 hBuffer, false , bufferOrigin, hostOrigin, region, bufferRowPitch,
@@ -519,7 +533,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMAdvise(const void *pMem, size_t size,
519533
520534 auto lockedBatch = currentCmdLists.lock ();
521535
522- lockedBatch-> markIssuedCommand ( );
536+ markIssuedCommandInBatch (lockedBatch );
523537
524538 return lockedBatch->getActiveBatch ().appendUSMAdvise (
525539 pMem, size, advice, emptyWaitList,
@@ -535,7 +549,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMMemcpy2D(
535549 wait_list_view (phEventWaitList, numEventsInWaitList, this );
536550 auto lockedBatch = currentCmdLists.lock ();
537551
538- lockedBatch-> markIssuedCommand ( );
552+ markIssuedCommandInBatch (lockedBatch );
539553
540554 UR_CALL (lockedBatch->getActiveBatch ().appendUSMMemcpy2D (
541555 false , pDst, dstPitch, pSrc, srcPitch, width, height, waitListView,
@@ -557,7 +571,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMFill2D(
557571 wait_list_view (phEventWaitList, numEventsInWaitList, this );
558572 auto lockedBatch = currentCmdLists.lock ();
559573
560- lockedBatch-> markIssuedCommand ( );
574+ markIssuedCommandInBatch (lockedBatch );
561575
562576 return lockedBatch->getActiveBatch ().appendUSMFill2D (
563577 pMem, pitch, patternSize, pPattern, width, height, waitListView,
@@ -573,7 +587,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMPrefetch(
573587 wait_list_view (phEventWaitList, numEventsInWaitList, this );
574588 auto lockedBatch = currentCmdLists.lock ();
575589
576- lockedBatch-> markIssuedCommand ( );
590+ markIssuedCommandInBatch (lockedBatch );
577591
578592 return lockedBatch->getActiveBatch ().appendUSMPrefetch (
579593 pMem, size, flags, waitListView,
@@ -592,7 +606,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferCopyRect(
592606 wait_list_view (phEventWaitList, numEventsInWaitList, this );
593607 auto lockedBatch = currentCmdLists.lock ();
594608
595- lockedBatch-> markIssuedCommand ( );
609+ markIssuedCommandInBatch (lockedBatch );
596610
597611 return lockedBatch->getActiveBatch ().appendMemBufferCopyRect (
598612 hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch,
@@ -608,7 +622,7 @@ ur_result_t ur_queue_batched_t::enqueueEventsWaitWithBarrier(
608622 wait_list_view (phEventWaitList, numEventsInWaitList, this );
609623 auto lockedBatch = currentCmdLists.lock ();
610624
611- lockedBatch-> markIssuedCommand ( );
625+ markIssuedCommandInBatch (lockedBatch );
612626
613627 if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0 ) {
614628 UR_CALL (lockedBatch->getActiveBatch ().appendEventsWaitWithBarrier (
@@ -632,7 +646,7 @@ ur_queue_batched_t::enqueueEventsWait(uint32_t numEventsInWaitList,
632646
633647 auto lockedBatch = currentCmdLists.lock ();
634648
635- lockedBatch-> markIssuedCommand ( );
649+ markIssuedCommandInBatch (lockedBatch );
636650
637651 UR_CALL (lockedBatch->getActiveBatch ().appendEventsWait (
638652 waitListView, createEventIfRequestedRegular (
@@ -650,7 +664,7 @@ ur_result_t ur_queue_batched_t::enqueueMemBufferCopy(
650664
651665 auto lockedBatch = currentCmdLists.lock ();
652666
653- lockedBatch-> markIssuedCommand ( );
667+ markIssuedCommandInBatch (lockedBatch );
654668
655669 return lockedBatch->getActiveBatch ().appendMemBufferCopy (
656670 hBufferSrc, hBufferDst, srcOffset, dstOffset, size, waitListView,
@@ -667,7 +681,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMFill(
667681
668682 auto lockedBatch = currentCmdLists.lock ();
669683
670- lockedBatch-> markIssuedCommand ( );
684+ markIssuedCommandInBatch (lockedBatch );
671685
672686 return lockedBatch->getActiveBatch ().appendUSMFill (
673687 pMem, patternSize, pPattern, size, waitListView,
@@ -685,7 +699,7 @@ ur_result_t ur_queue_batched_t::enqueueMemImageRead(
685699
686700 auto lockedBatch = currentCmdLists.lock ();
687701
688- lockedBatch-> markIssuedCommand ( );
702+ markIssuedCommandInBatch (lockedBatch );
689703
690704 UR_CALL (lockedBatch->getActiveBatch ().appendMemImageRead (
691705 hImage, false , origin, region, rowPitch, slicePitch, pDst, waitListView,
@@ -709,7 +723,7 @@ ur_result_t ur_queue_batched_t::enqueueMemImageWrite(
709723
710724 auto lockedBatch = currentCmdLists.lock ();
711725
712- lockedBatch-> markIssuedCommand ( );
726+ markIssuedCommandInBatch (lockedBatch );
713727
714728 UR_CALL (lockedBatch->getActiveBatch ().appendMemImageWrite (
715729 hImage, false , origin, region, rowPitch, slicePitch, pSrc, waitListView,
@@ -732,7 +746,7 @@ ur_result_t ur_queue_batched_t::enqueueMemImageCopy(
732746
733747 auto lockedBatch = currentCmdLists.lock ();
734748
735- lockedBatch-> markIssuedCommand ( );
749+ markIssuedCommandInBatch (lockedBatch );
736750
737751 return lockedBatch->getActiveBatch ().appendMemImageCopy (
738752 hImageSrc, hImageDst, srcOrigin, dstOrigin, region, waitListView,
@@ -749,7 +763,7 @@ ur_result_t ur_queue_batched_t::enqueueReadHostPipe(
749763
750764 auto lockedBatch = currentCmdLists.lock ();
751765
752- lockedBatch-> markIssuedCommand ( );
766+ markIssuedCommandInBatch (lockedBatch );
753767
754768 UR_CALL (lockedBatch->getActiveBatch ().appendReadHostPipe (
755769 hProgram, pipe_symbol, false , pDst, size, waitListView,
@@ -772,7 +786,7 @@ ur_result_t ur_queue_batched_t::enqueueWriteHostPipe(
772786
773787 auto lockedBatch = currentCmdLists.lock ();
774788
775- lockedBatch-> markIssuedCommand ( );
789+ markIssuedCommandInBatch (lockedBatch );
776790
777791 UR_CALL (lockedBatch->getActiveBatch ().appendWriteHostPipe (
778792 hProgram, pipe_symbol, false , pSrc, size, waitListView,
@@ -796,7 +810,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMDeviceAllocExp(
796810
797811 auto lockedBatch = currentCmdLists.lock ();
798812
799- lockedBatch-> markIssuedCommand ( );
813+ markIssuedCommandInBatch (lockedBatch );
800814
801815 UR_CALL (lockedBatch->getActiveBatch ().appendUSMAllocHelper (
802816 this , pPool, size, pProperties, waitListView, ppMem,
@@ -818,7 +832,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMSharedAllocExp(
818832
819833 auto lockedBatch = currentCmdLists.lock ();
820834
821- lockedBatch-> markIssuedCommand ( );
835+ markIssuedCommandInBatch (lockedBatch );
822836
823837 UR_CALL (lockedBatch->getActiveBatch ().appendUSMAllocHelper (
824838 this , pPool, size, pProperties, waitListView, ppMem,
@@ -839,7 +853,7 @@ ur_result_t ur_queue_batched_t::enqueueUSMHostAllocExp(
839853
840854 auto lockedBatch = currentCmdLists.lock ();
841855
842- lockedBatch-> markIssuedCommand ( );
856+ markIssuedCommandInBatch (lockedBatch );
843857
844858 UR_CALL (lockedBatch->getActiveBatch ().appendUSMAllocHelper (
845859 this , pPool, size, pProperties, waitListView, ppMem,
@@ -866,7 +880,7 @@ ur_result_t ur_queue_batched_t::bindlessImagesImageCopyExp(
866880
867881 auto lockedBatch = currentCmdLists.lock ();
868882
869- lockedBatch-> markIssuedCommand ( );
883+ markIssuedCommandInBatch (lockedBatch );
870884
871885 return lockedBatch->getActiveBatch ().bindlessImagesImageCopyExp (
872886 pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat,
@@ -885,7 +899,7 @@ ur_result_t ur_queue_batched_t::bindlessImagesWaitExternalSemaphoreExp(
885899
886900 auto lockedBatch = currentCmdLists.lock ();
887901
888- lockedBatch-> markIssuedCommand ( );
902+ markIssuedCommandInBatch (lockedBatch );
889903
890904 return lockedBatch->getActiveBatch ().bindlessImagesWaitExternalSemaphoreExp (
891905 hSemaphore, hasWaitValue, waitValue, waitListView,
@@ -902,7 +916,7 @@ ur_result_t ur_queue_batched_t::bindlessImagesSignalExternalSemaphoreExp(
902916
903917 auto lockedBatch = currentCmdLists.lock ();
904918
905- lockedBatch-> markIssuedCommand ( );
919+ markIssuedCommandInBatch (lockedBatch );
906920
907921 return lockedBatch->getActiveBatch ().bindlessImagesSignalExternalSemaphoreExp (
908922 hSemaphore, hasSignalValue, signalValue, waitListView,
@@ -934,7 +948,7 @@ ur_result_t ur_queue_batched_t::enqueueTimestampRecordingExp(
934948
935949 // auto lockedBatch = currentCmdLists.lock();
936950
937- // lockedBatch->markIssuedCommand ();
951+ // lockedBatch->markNextIssuedCommand ();
938952
939953 // UR_CALL(lockedBatch->getActiveBatch().appendTimestampRecordingExp(
940954 // false, waitListView,
@@ -981,7 +995,7 @@ ur_result_t ur_queue_batched_t::enqueueNativeCommandExp(
981995
982996 auto lockedBatch = currentCmdLists.lock ();
983997
984- lockedBatch-> markIssuedCommand ( );
998+ markIssuedCommandInBatch (lockedBatch );
985999
9861000 return lockedBatch->getActiveBatch ().appendNativeCommandExp (
9871001 pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties,
@@ -1004,7 +1018,7 @@ ur_result_t ur_queue_batched_t::enqueueKernelLaunchWithArgsExp(
10041018
10051019 auto lockedBatch = currentCmdLists.lock ();
10061020
1007- lockedBatch-> markIssuedCommand ( );
1021+ markIssuedCommandInBatch (lockedBatch );
10081022
10091023 return lockedBatch->getActiveBatch ().appendKernelLaunchWithArgsExp (
10101024 hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
0 commit comments