@@ -585,9 +585,18 @@ void HitProcessingLoop(HitProcessingContext *const context, GPUstate &gpuState,
585
585
std::unique_lock lock (context->mutex );
586
586
context->cv .wait (lock);
587
587
588
+ // Possible timing
589
+ // auto start = std::chrono::high_resolution_clock::now();
588
590
gpuState.fHitScoring ->TransferHitsToHost (context->hitTransferStream );
589
591
const bool haveNewHits = gpuState.fHitScoring ->ProcessHits ();
590
592
593
+ // auto end = std::chrono::high_resolution_clock::now();
594
+ // std::chrono::duration<double> elapsed = end - start;
595
+
596
+ // if (haveNewHits) {
597
+ // std::cout << "HIT Processing time: " << elapsed.count() << " seconds" << std::endl;
598
+ // }
599
+
591
600
if (haveNewHits) {
592
601
AdvanceEventStates (EventState::FlushingHits, EventState::HitsFlushed, eventStates);
593
602
cvG4Workers.notify_all ();
@@ -597,7 +606,7 @@ void HitProcessingLoop(HitProcessingContext *const context, GPUstate &gpuState,
597
606
598
607
void TransportLoop (int trackCapacity, int scoringCapacity, int numThreads, TrackBuffer &trackBuffer, GPUstate &gpuState,
599
608
std::vector<std::atomic<EventState>> &eventStates, std::condition_variable &cvG4Workers,
600
- std::vector<AdePTScoring> &scoring, int adeptSeed)
609
+ std::vector<AdePTScoring> &scoring, int adeptSeed, int debugLevel )
601
610
{
602
611
// NVTXTracer tracer{"TransportLoop"};
603
612
@@ -661,10 +670,9 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
661
670
std::this_thread::sleep_for (10ms);
662
671
}
663
672
664
- // TODO: Pass debug level here
665
- // if (fDebugLevel > 2) {
666
- // G4cout << "GPU transport starting" << std::endl;
667
- // }
673
+ if (debugLevel > 2 ) {
674
+ G4cout << " GPU transport starting" << std::endl;
675
+ }
668
676
669
677
COPCORE_CUDA_CHECK (cudaStreamSynchronize (gpuState.stream ));
670
678
@@ -727,8 +735,7 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
727
735
const auto nInject = std::min (toDevice.nTrack .load (), toDevice.maxTracks );
728
736
toDevice.nTrack = 0 ;
729
737
730
- // TODO: Pass debug level here
731
- // if (fDebugLevel > 3) std::cout << "Injecting " << nInject << " to GPU\n";
738
+ if (debugLevel > 3 ) std::cout << " Injecting " << nInject << " to GPU\n " ;
732
739
733
740
// copy buffer of tracks to device
734
741
COPCORE_CUDA_CHECK (cudaMemcpyAsync (trackBuffer.toDevice_dev .get (), toDevice.tracks ,
@@ -963,8 +970,10 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
963
970
hitProcessing->cv .notify_one ();
964
971
} else {
965
972
if (gpuState.stats ->hitBufferOccupancy >= gpuState.fHitScoring ->HitCapacity () / 2 ||
966
- std::any_of (eventStates.begin (), eventStates.end (),
967
- [](const auto &state) { return state == EventState::RequestHitFlush; })) {
973
+ gpuState.stats ->hitBufferOccupancy >= 10000 ||
974
+ std::any_of (eventStates.begin (), eventStates.end (), [](const auto &state) {
975
+ return state.load (std::memory_order_acquire) == EventState::RequestHitFlush;
976
+ })) {
968
977
AdvanceEventStates (EventState::RequestHitFlush, EventState::FlushingHits, eventStates);
969
978
gpuState.fHitScoring ->SwapDeviceBuffers (gpuState.stream );
970
979
hitProcessing->cv .notify_one ();
@@ -978,10 +987,7 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
978
987
cvG4Workers.notify_all ();
979
988
}
980
989
981
- // TODO: get fDebugLevel correctly and put prints back in.
982
- int fDebugLevel = 0 ;
983
- int fNThread = numThreads;
984
- if (fDebugLevel >= 3 && inFlight > 0 || (fDebugLevel >= 2 && iteration % 500 == 0 )) {
990
+ if (debugLevel >= 3 && inFlight > 0 || (debugLevel >= 2 && iteration % 500 == 0 )) {
985
991
std::cerr << inFlight << " in flight " ;
986
992
std::cerr << " (" << gpuState.stats ->inFlight [ParticleType::Electron] << " "
987
993
<< gpuState.stats ->inFlight [ParticleType::Positron] << " "
@@ -992,10 +998,11 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
992
998
std::cerr << " \t slots:" << gpuState.stats ->slotFillLevel << " , " << numLeaked << " leaked."
993
999
<< " \t InjectState: " << static_cast <unsigned int >(gpuState.injectState .load ())
994
1000
<< " \t ExtractState: " << static_cast <unsigned int >(gpuState.extractState .load ())
995
- << " \t HitBuffer: " << gpuState.stats ->hitBufferOccupancy ;
996
- if (fDebugLevel >= 4 ) {
1001
+ << " \t HitBuffer: " << gpuState.stats ->hitBufferOccupancy
1002
+ << " \t HitBufferReadyToSwap: " << gpuState.fHitScoring ->ReadyToSwapBuffers ();
1003
+ if (debugLevel >= 4 ) {
997
1004
std::cerr << " \n\t per event: " ;
998
- for (unsigned int i = 0 ; i < fNThread ; ++i) {
1005
+ for (unsigned int i = 0 ; i < numThreads ; ++i) {
999
1006
std::cerr << i << " : " << gpuState.stats ->perEventInFlight [i]
1000
1007
<< " (s=" << static_cast <unsigned short >(eventStates[i].load (std::memory_order_acquire)) << " )\t " ;
1001
1008
}
@@ -1041,8 +1048,7 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
1041
1048
ClearAllQueues<<<1 , 1 , 0 , gpuState.stream>>> (queues);
1042
1049
COPCORE_CUDA_CHECK (cudaStreamSynchronize (gpuState.stream ));
1043
1050
1044
- // TODO
1045
- // if (fDebugLevel > 2) std::cout << "End transport loop.\n";
1051
+ if (debugLevel > 2 ) std::cout << " End transport loop.\n " ;
1046
1052
}
1047
1053
1048
1054
hitProcessing->keepRunning = false ;
@@ -1063,11 +1069,13 @@ std::shared_ptr<const std::vector<GPUHit>> GetGPUHits(unsigned int threadId, GPU
1063
1069
// separate init function that will compile here and be called from the .icc
1064
1070
std::thread LaunchGPUWorker (int trackCapacity, int scoringCapacity, int numThreads, TrackBuffer &trackBuffer,
1065
1071
GPUstate &gpuState, std::vector<std::atomic<EventState>> &eventStates,
1066
- std::condition_variable &cvG4Workers, std::vector<AdePTScoring> &scoring, int adeptSeed)
1072
+ std::condition_variable &cvG4Workers, std::vector<AdePTScoring> &scoring, int adeptSeed,
1073
+ int debugLevel)
1067
1074
{
1068
- return std::thread{&TransportLoop, trackCapacity, scoringCapacity, numThreads,
1069
- std::ref (trackBuffer), std::ref (gpuState), std::ref (eventStates), std::ref (cvG4Workers),
1070
- std::ref (scoring), adeptSeed};
1075
+ return std::thread{
1076
+ &TransportLoop, trackCapacity, scoringCapacity, numThreads, std::ref (trackBuffer),
1077
+ std::ref (gpuState), std::ref (eventStates), std::ref (cvG4Workers), std::ref (scoring), adeptSeed,
1078
+ debugLevel};
1071
1079
}
1072
1080
1073
1081
void FreeGPU (std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> &gpuState, G4HepEmState &g4hepem_state,
0 commit comments