enable calling of UserSteppingAction and PostUserTrackingAction (apt-sim#356)

SeverinDiederichs · web-flow · commit f1d2bb7e09ac · 2025-03-10T21:25:18.000+01:00
If the UserSteppingAction is required, we need to copy back every GPU step back to the G4 workers. This required to change the kernels, as we need to be able to record every step independent of edep or sensitive detector. Since copying back every step can lead to a very large amount of steps, this would quickly fill the buffer. Then, if the buffer gets too full before the Geant4 workers take care of their hits, the GPUStep Management Thread would start copying out the GPUSteps, as implemented in apt-sim#350. However, this copying is too slow, if every Step is recorded and leads to the GPU running out of HitSlots. Previously, the Geant4 workers would only take care of the GPU Steps after their transport has finished. However, this may be too late and the buffer may be too full, leading to copying. Therefore, the Geant4 workers must be able to process some of the steps already earlier. This is now done in the `AdePTTrackingManager`: Before a new track is processed, the `GPUStepProcessing` is called. This way, the GPU step buffer can be kept under control. To enable this, the processing of the GPUSteps is now encapsulated in a single function, that can be called from the `AdePTTrackingManager`. In the same manner, the PostUserTrackingAction is called. For this, the RecordHit also writes if it is the LastStep of a track. Note that it is straightforward to calso call the PreUserTrackingAction. This requires a StepCounter, which is availale in the B field update branch, so I will add it *after* the B field branch is merged. Both can be enabled via: ``` /adept/CallUserSteppingAction true /adept/CallPostUserTrackingAction true ``` Since this PR touches the kernels, below the physics validation at high statistics, which is as good as it should be: <img width="586" alt="Screenshot 2025-03-09 at 07 30 15" src="https://github.com/user-attachments/assets/8027a386-2680-4b22-9c4f-8da91a693ea3" />
diff --git a/include/AdePT/core/AdePTConfiguration.hh b/include/AdePT/core/AdePTConfiguration.hh
@@ -29,6 +29,11 @@ public:
   ~AdePTConfiguration() {}
   void SetNumThreads(int numThreads) { fNumThreads = numThreads; }
   void SetTrackInAllRegions(bool trackInAllRegions) { fTrackInAllRegions = trackInAllRegions; }
+  void SetCallUserSteppingAction(bool callUserSteppingAction) { fCallUserSteppingAction = callUserSteppingAction; }
+  void SetCallPostUserTrackingAction(bool callPostUserTrackingAction)
+  {
+    fCallPostUserTrackingAction = callPostUserTrackingAction;
+  }
   void AddGPURegionName(std::string name) { fGPURegionNames.push_back(name); }
   void SetAdePTActivation(bool activateAdePT) { fAdePTActivated = activateAdePT; }
   void SetVerbosity(int verbosity) { fVerbosity = verbosity; };
@@ -43,6 +48,8 @@ public:
   void SetVecGeomGDML(std::string filename) { fVecGeomGDML = filename; }
 
   bool GetTrackInAllRegions() { return fTrackInAllRegions; }
+  bool GetCallUserSteppingAction() { return fCallUserSteppingAction; }
+  bool GetCallPostUserTrackingAction() { return fCallPostUserTrackingAction; }
   bool IsAdePTActivated() { return fAdePTActivated; }
   int GetNumThreads() { return fNumThreads; };
   int GetVerbosity() { return fVerbosity; };
@@ -59,6 +66,8 @@ public:
 
 private:
   bool fTrackInAllRegions{false};
+  bool fCallUserSteppingAction{false};
+  bool fCallPostUserTrackingAction{false};
   bool fAdePTActivated{true};
   int fNumThreads;
   int fVerbosity{0};
diff --git a/include/AdePT/core/AdePTScoringTemplate.cuh b/include/AdePT/core/AdePTScoringTemplate.cuh
@@ -23,7 +23,7 @@ __device__ void RecordHit(Scoring *scoring_dev, int aParentID, char aParticleTyp
                           vecgeom::Vector3D<Precision> const &aPreMomentumDirection, double aPreEKin, double aPreCharge,
                           vecgeom::NavigationState const &aPostState, vecgeom::Vector3D<Precision> const &aPostPosition,
                           vecgeom::Vector3D<Precision> const &aPostMomentumDirection, double aPostEKin,
-                          double aPostCharge, unsigned int eventId, short threadId);
+                          double aPostCharge, unsigned int eventId, short threadId, bool isLastStep);
 
 template <typename Scoring>
 __device__ void AccountProduced(Scoring *scoring_dev, int num_ele, int num_pos, int num_gam);
diff --git a/include/AdePT/core/AdePTTransport.h b/include/AdePT/core/AdePTTransport.h
@@ -78,6 +78,7 @@ class AdePTTransport : public AdePTTransportInterface {
   void Cleanup();
   /// @brief Interface for transporting a buffer of tracks in AdePT.
   void Shower(int event, int threadId);
+  void ProcessGPUSteps(int, int) {};
 
 private:
   static inline G4HepEmState *fg4hepem_state{nullptr}; ///< The HepEm state singleton
@@ -106,7 +107,6 @@ class AdePTTransport : public AdePTTransportInterface {
   bool InitializeField(double bz);
   bool InitializeGeometry(const vecgeom::cxx::VPlacedVolume *world);
   bool InitializePhysics();
-  void ProcessGPUHits();
 };
 
 #include "AdePTTransport.icc"
diff --git a/include/AdePT/core/AdePTTransportInterface.hh b/include/AdePT/core/AdePTTransportInterface.hh
@@ -47,8 +47,9 @@ public:
   /// @brief Initialize the ApplyCuts flag on device
   virtual bool InitializeApplyCuts(bool applycuts) = 0;
   /// @brief Interface for transporting a buffer of tracks in AdePT.
-  virtual void Shower(int event, int threadId) = 0;
-  virtual void Cleanup()                       = 0;
+  virtual void Shower(int event, int threadId)            = 0;
+  virtual void Cleanup()                                  = 0;
+  virtual void ProcessGPUSteps(int threadId, int eventId) = 0;
 };
 
 #endif
diff --git a/include/AdePT/core/AsyncAdePTTransport.cuh b/include/AdePT/core/AsyncAdePTTransport.cuh
@@ -616,7 +616,8 @@ void HitProcessingLoop(HitProcessingContext *const context, GPUstate &gpuState,
 
 void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, TrackBuffer &trackBuffer, GPUstate &gpuState,
                    std::vector<std::atomic<EventState>> &eventStates, std::condition_variable &cvG4Workers,
-                   std::vector<AdePTScoring> &scoring, int adeptSeed, int debugLevel)
+                   std::vector<AdePTScoring> &scoring, int adeptSeed, int debugLevel, bool returnAllSteps,
+                   bool returnLastStep)
 {
   // NVTXTracer tracer{"TransportLoop"};
 
@@ -791,7 +792,7 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
         const auto [threads, blocks] = computeThreadsAndBlocks(particlesInFlight[ParticleType::Electron]);
         TransportElectrons<PerEventScoring><<<blocks, threads, 0, electrons.stream>>>(
             electrons.tracks, electrons.queues.currentlyActive, secondaries, electrons.queues.nextActive,
-            electrons.queues.leakedTracksCurrent, gpuState.fScoring_dev);
+            electrons.queues.leakedTracksCurrent, gpuState.fScoring_dev, returnAllSteps, returnLastStep);
 
         COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream));
         COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, electrons.event, 0));
@@ -802,7 +803,7 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
         const auto [threads, blocks] = computeThreadsAndBlocks(particlesInFlight[ParticleType::Positron]);
         TransportPositrons<PerEventScoring><<<blocks, threads, 0, positrons.stream>>>(
             positrons.tracks, positrons.queues.currentlyActive, secondaries, positrons.queues.nextActive,
-            positrons.queues.leakedTracksCurrent, gpuState.fScoring_dev);
+            positrons.queues.leakedTracksCurrent, gpuState.fScoring_dev, returnAllSteps, returnLastStep);
 
         COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream));
         COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, positrons.event, 0));
@@ -813,7 +814,8 @@ void TransportLoop(int trackCapacity, int scoringCapacity, int numThreads, Track
         const auto [threads, blocks] = computeThreadsAndBlocks(particlesInFlight[ParticleType::Gamma]);
         TransportGammas<PerEventScoring><<<blocks, threads, 0, gammas.stream>>>(
             gammas.tracks, gammas.queues.currentlyActive, secondaries, gammas.queues.nextActive,
-            gammas.queues.leakedTracksCurrent, gpuState.fScoring_dev); //, gpuState.gammaInteractions);
+            gammas.queues.leakedTracksCurrent, gpuState.fScoring_dev, returnAllSteps,
+            returnLastStep); //, gpuState.gammaInteractions);
 
         // constexpr unsigned int intThreads = 128;
         // ApplyGammaInteractions<PerEventScoring><<<dim3(20, 3, 1), intThreads, 0, gammas.stream>>>(
@@ -1099,12 +1101,12 @@ void CloseGPUBuffer(unsigned int threadId, GPUstate &gpuState, GPUHit *begin, co
 std::thread LaunchGPUWorker(int trackCapacity, int scoringCapacity, int numThreads, TrackBuffer &trackBuffer,
                             GPUstate &gpuState, std::vector<std::atomic<EventState>> &eventStates,
                             std::condition_variable &cvG4Workers, std::vector<AdePTScoring> &scoring, int adeptSeed,
-                            int debugLevel)
+                            int debugLevel, bool returnAllSteps, bool returnLastStep)
 {
   return std::thread{
       &TransportLoop,     trackCapacity,         scoringCapacity,       numThreads,        std::ref(trackBuffer),
       std::ref(gpuState), std::ref(eventStates), std::ref(cvG4Workers), std::ref(scoring), adeptSeed,
-      debugLevel};
+      debugLevel,         returnAllSteps,        returnLastStep};
 }
 
 void FreeGPU(std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> &gpuState, G4HepEmState &g4hepem_state,
diff --git a/include/AdePT/core/AsyncAdePTTransport.hh b/include/AdePT/core/AsyncAdePTTransport.hh
@@ -59,6 +59,9 @@ private:
   std::vector<double> fGPUNetEnergy;
   bool fTrackInAllRegions = false;
   std::vector<std::string> const *fGPURegionNames;
+  // Flags for the kernels to return the last or all steps, needed for PostUserTrackingAction or UserSteppingAction
+  bool fReturnAllSteps = false;
+  bool fReturnLastStep = false;
 
   void Initialize();
   void InitBVH();
@@ -105,6 +108,7 @@ public:
   void Shower(int event, int threadId) override { Flush(threadId, event); }
   /// Block until transport of the given event is done.
   void Flush(int threadId, int eventId);
+  void ProcessGPUSteps(int threadId, int eventId) override;
   void Cleanup() override {}
 };
 
diff --git a/include/AdePT/core/AsyncAdePTTransport.icc b/include/AdePT/core/AsyncAdePTTransport.icc
@@ -48,7 +48,7 @@ std::pair<GPUHit *, GPUHit *> GetGPUHitsFromBuffer(unsigned int, unsigned int, A
 void CloseGPUBuffer(unsigned int, AsyncAdePT::GPUstate &, GPUHit *, const bool);
 std::thread LaunchGPUWorker(int, int, int, AsyncAdePT::TrackBuffer &, AsyncAdePT::GPUstate &,
                             std::vector<std::atomic<AsyncAdePT::EventState>> &, std::condition_variable &,
-                            std::vector<AdePTScoring> &, int, int);
+                            std::vector<AdePTScoring> &, int, int, bool, bool);
 std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> InitializeGPU(int trackCapacity, int scoringCapacity,
                                                                                  int numThreads,
                                                                                  AsyncAdePT::TrackBuffer &trackBuffer,
@@ -85,7 +85,8 @@ AsyncAdePTTransport<IntegrationLayer>::AsyncAdePTTransport(AdePTConfiguration &c
       fDebugLevel{configuration.GetVerbosity()}, fIntegrationLayerObjects(fNThread), fEventStates(fNThread),
       fGPUNetEnergy(fNThread, 0.0), fTrackInAllRegions{configuration.GetTrackInAllRegions()},
       fGPURegionNames{configuration.GetGPURegionNames()}, fCUDAStackLimit{configuration.GetCUDAStackLimit()},
-      fCUDAHeapLimit{configuration.GetCUDAHeapLimit()}
+      fCUDAHeapLimit{configuration.GetCUDAHeapLimit()}, fReturnAllSteps{configuration.GetCallUserSteppingAction()},
+      fReturnLastStep{configuration.GetCallPostUserTrackingAction()}
 {
   if (fNThread > kMaxThreads)
     throw std::invalid_argument("AsyncAdePTTransport limited to " + std::to_string(kMaxThreads) + " threads");
@@ -244,7 +245,8 @@ void AsyncAdePTTransport<IntegrationLayer>::Initialize()
 
   fGPUstate  = async_adept_impl::InitializeGPU(fTrackCapacity, fScoringCapacity, fNThread, *fBuffer, fScoring);
   fGPUWorker = async_adept_impl::LaunchGPUWorker(fTrackCapacity, fScoringCapacity, fNThread, *fBuffer, *fGPUstate,
-                                                 fEventStates, fCV_G4Workers, fScoring, fAdePTSeed, fDebugLevel);
+                                                 fEventStates, fCV_G4Workers, fScoring, fAdePTSeed, fDebugLevel,
+                                                 fReturnAllSteps, fReturnLastStep);
 }
 
 template <typename IntegrationLayer>
@@ -254,6 +256,33 @@ void AsyncAdePTTransport<IntegrationLayer>::InitBVH()
   vecgeom::cxx::BVHManager::DeviceInit();
 }
 
+template <typename IntegrationLayer>
+void AsyncAdePTTransport<IntegrationLayer>::ProcessGPUSteps(int threadId, int eventId)
+{
+
+  AdePTGeant4Integration &integrationInstance = fIntegrationLayerObjects[threadId];
+  std::pair<GPUHit *, GPUHit *> range;
+  bool dataOnBuffer;
+
+  while ((range = async_adept_impl::GetGPUHitsFromBuffer(threadId, eventId, *fGPUstate, dataOnBuffer)).first !=
+         nullptr) {
+    for (auto it = range.first; it != range.second; ++it) {
+      // important sanity check: thread should only process its own hits and only from the current event
+      if (it->threadId != threadId)
+        std::cerr << "\033[1;31mError, threadId doesn't match it->threadId " << it->threadId << " threadId " << threadId
+                  << "\033[0m" << std::endl;
+      if (it->fEventId != eventId) {
+        std::cerr << "\033[1;31mError, eventId doesn't match it->fEventId " << it->fEventId << "eventId " << eventId
+                  << " num hits to be processed " << (range.second - range.first) << " dataOnBuffer " << dataOnBuffer
+                  << "state : " << static_cast<unsigned int>(fEventStates[threadId].load(std::memory_order_acquire))
+                  << "\033[0m" << std::endl;
+      }
+      integrationInstance.ProcessGPUStep(*it, fReturnAllSteps, fReturnLastStep);
+    }
+    async_adept_impl::CloseGPUBuffer(threadId, *fGPUstate, range.first, dataOnBuffer);
+  }
+}
+
 template <typename IntegrationLayer>
 void AsyncAdePTTransport<IntegrationLayer>::Flush(G4int threadId, G4int eventId)
 {
@@ -268,31 +297,12 @@ void AsyncAdePTTransport<IntegrationLayer>::Flush(G4int threadId, G4int eventId)
 
   while (fEventStates[threadId].load(std::memory_order_acquire) < EventState::DeviceFlushed) {
 
-    std::pair<GPUHit *, GPUHit *> range;
-    bool dataOnBuffer;
-
     {
       std::unique_lock lock{fMutex_G4Workers};
       fCV_G4Workers.wait(lock);
     }
 
-    while ((range = async_adept_impl::GetGPUHitsFromBuffer(threadId, eventId, *fGPUstate, dataOnBuffer)).first !=
-           nullptr) {
-      for (auto it = range.first; it != range.second; ++it) {
-        // important sanity check: thread should only process its own hits and only from the current event
-        if (it->threadId != threadId)
-          std::cerr << "Error, threadId doesn't match it->threadId " << it->threadId << " threadId " << threadId
-                    << std::endl;
-        if (it->fEventId != eventId) {
-          std::cerr << "Error, eventId doesn't match it->fEventId " << it->fEventId << "eventId " << eventId
-                    << " num hits to be processed " << (range.second - range.first) << " dataOnBuffer " << dataOnBuffer
-                    << "state : " << static_cast<unsigned int>(fEventStates[threadId].load(std::memory_order_acquire))
-                    << std::endl;
-        }
-        integrationInstance.ProcessGPUHit(*it);
-      }
-      async_adept_impl::CloseGPUBuffer(threadId, *fGPUstate, range.first, dataOnBuffer);
-    }
+    ProcessGPUSteps(threadId, eventId);
   }
 
   // Now device should be flushed, so retrieve the tracks:
diff --git a/include/AdePT/core/HostScoringImpl.cuh b/include/AdePT/core/HostScoringImpl.cuh
@@ -155,15 +155,15 @@ __device__ void RecordHit(HostScoring *hostScoring_dev, int aParentID, char aPar
                           vecgeom::Vector3D<Precision> const &aPreMomentumDirection, double aPreEKin, double aPreCharge,
                           vecgeom::NavigationState const &aPostState, vecgeom::Vector3D<Precision> const &aPostPosition,
                           vecgeom::Vector3D<Precision> const &aPostMomentumDirection, double aPostEKin,
-                          double aPostCharge, unsigned int, short)
+                          double aPostCharge, unsigned int, short, bool)
 {
   // Acquire a hit slot
   GPUHit &aGPUHit = *GetNextFreeHit(hostScoring_dev);
 
   // Fill the required data
   FillHit(aGPUHit, aParentID, aParticleType, aStepLength, aTotalEnergyDeposit, aPreState, aPrePosition,
           aPreMomentumDirection, aPreEKin, aPreCharge, aPostState, aPostPosition, aPostMomentumDirection, aPostEKin,
-          aPostCharge, 0, 0);
+          aPostCharge, 0, 0, false);
 }
 
 /// @brief Account for the number of produced secondaries
@@ -202,7 +202,7 @@ inline void EndOfIteration(HostScoring &hostScoring, HostScoring *hostScoring_de
     COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream));
     // Process the hits on CPU
     for (const auto &hit : hostScoring) {
-      integration.ProcessGPUHit(hit);
+      integration.ProcessGPUStep(hit);
     }
   }
 }
@@ -220,7 +220,7 @@ inline void EndOfTransport(HostScoring &hostScoring, HostScoring *hostScoring_de
   COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream));
   // Process the last hits on CPU
   for (const auto &hit : hostScoring) {
-    integration.ProcessGPUHit(hit);
+    integration.ProcessGPUStep(hit);
   }
 }
 } // namespace adept_scoring
diff --git a/include/AdePT/core/PerEventScoringImpl.cuh b/include/AdePT/core/PerEventScoringImpl.cuh
@@ -674,15 +674,15 @@ __device__ void RecordHit(AsyncAdePT::PerEventScoring * /*scoring*/, int aParent
                           vecgeom::Vector3D<Precision> const &aPreMomentumDirection, double aPreEKin, double aPreCharge,
                           vecgeom::NavigationState const &aPostState, vecgeom::Vector3D<Precision> const &aPostPosition,
                           vecgeom::Vector3D<Precision> const &aPostMomentumDirection, double aPostEKin,
-                          double aPostCharge, unsigned int eventID, short threadID)
+                          double aPostCharge, unsigned int eventID, short threadID, bool isLastStep)
 {
   // Acquire a hit slot
   GPUHit &aGPUHit = AsyncAdePT::gHitScoringBuffer_dev.GetNextSlot(threadID);
 
   // Fill the required data
   FillHit(aGPUHit, aParentID, aParticleType, aStepLength, aTotalEnergyDeposit, aPreState, aPrePosition,
           aPreMomentumDirection, aPreEKin, aPreCharge, aPostState, aPostPosition, aPostMomentumDirection, aPostEKin,
-          aPostCharge, eventID, threadID);
+          aPostCharge, eventID, threadID, isLastStep);
 }
 
 /// @brief Account for the number of produced secondaries
diff --git a/include/AdePT/core/ScoringCommons.hh b/include/AdePT/core/ScoringCommons.hh
@@ -21,18 +21,18 @@ struct GPUStepPoint {
 // Stores the necessary data to reconstruct GPU hits on the host , and
 // call the user-defined Geant4 sensitive detector code
 struct GPUHit {
-  int fParentID{0}; // Track ID
+  // Data needed to reconstruct pre-post step points
+  GPUStepPoint fPreStepPoint;
+  GPUStepPoint fPostStepPoint;
   // Data needed to reconstruct G4 Step
   double fStepLength{0};
   double fTotalEnergyDeposit{0};
   double fNonIonizingEnergyDeposit{0};
-  // bool fFirstStepInVolume{false};
-  // bool fLastStepInVolume{false};
-  // Data needed to reconstruct pre-post step points
-  GPUStepPoint fPreStepPoint;
-  GPUStepPoint fPostStepPoint;
+  int fParentID{0}; // Track ID
   unsigned int fEventId{0};
   short threadId{-1};
+  // bool fFirstStepInVolume{false};
+  bool fLastStepOfTrack{false};
   char fParticleType{0}; // Particle type ID
 };
 
@@ -74,11 +74,12 @@ __device__ __forceinline__ void FillHit(GPUHit &aGPUHit, int aParentID, char aPa
                                         double aPreCharge, vecgeom::NavigationState const &aPostState,
                                         vecgeom::Vector3D<Precision> const &aPostPosition,
                                         vecgeom::Vector3D<Precision> const &aPostMomentumDirection, double aPostEKin,
-                                        double aPostCharge, unsigned int eventID, short threadID)
+                                        double aPostCharge, unsigned int eventID, short threadID, bool isLastStep)
 {
   aGPUHit.fEventId = eventID;
   aGPUHit.threadId = threadID;
 
+  aGPUHit.fLastStepOfTrack = isLastStep;
   // Fill the required data
   aGPUHit.fParentID           = aParentID;
   aGPUHit.fParticleType       = aParticleType;
diff --git a/include/AdePT/integration/AdePTConfigurationMessenger.hh b/include/AdePT/integration/AdePTConfigurationMessenger.hh
@@ -36,6 +36,8 @@ private:
   G4UIcmdWithAnInteger *fSetCUDAStackLimitCmd;
   G4UIcmdWithAnInteger *fSetCUDAHeapLimitCmd;
   G4UIcmdWithABool *fSetTrackInAllRegionsCmd;
+  G4UIcmdWithABool *fSetCallUserSteppingActionCmd;
+  G4UIcmdWithABool *fSetCallPostUserTrackingActionCmd;
   G4UIcmdWithAString *fAddRegionCmd;
   G4UIcmdWithABool *fActivateAdePTCmd;
   G4UIcmdWithAnInteger *fSetVerbosityCmd;
diff --git a/include/AdePT/integration/AdePTGeant4Integration.hh b/include/AdePT/integration/AdePTGeant4Integration.hh
@@ -54,7 +54,8 @@ public:
                              std::vector<G4LogicalVolume const *> &vecgeomLvToG4Map);
 
   /// @brief Reconstructs GPU hits on host and calls the user-defined sensitive detector code
-  void ProcessGPUHit(GPUHit const &hit);
+  void ProcessGPUStep(GPUHit const &hit, bool const callUserSteppingAction = false,
+                      bool const callPostUserTrackingaction = false);
 
   /// @brief Takes a range of tracks coming from the device and gives them back to Geant4
   template <typename Iterator>
diff --git a/include/AdePT/integration/AdePTTrackingManager.hh b/include/AdePT/integration/AdePTTrackingManager.hh
@@ -83,6 +83,10 @@ private:
   bool fAdePTInitialized{false};
 };
 
+#ifdef ASYNC_MODE
+std::shared_ptr<AsyncAdePT::AsyncAdePTTransport<AdePTGeant4Integration>> GetAdePTInstance();
+#endif
+
 //....oooOO0OOooo........oooOO0OOooo........oooOO0OOooo........oooOO0OOooo......
 
 #endif
diff --git a/include/AdePT/kernels/electrons.cuh b/include/AdePT/kernels/electrons.cuh
diff --git a/include/AdePT/kernels/gammas.cuh b/include/AdePT/kernels/gammas.cuh
diff --git a/src/AdePTConfigurationMessenger.cc b/src/AdePTConfigurationMessenger.cc
diff --git a/src/AdePTGeant4Integration.cpp b/src/AdePTGeant4Integration.cpp
diff --git a/src/AdePTTrackingManager.cc b/src/AdePTTrackingManager.cc

Original file line number	Diff line number	Diff line change
`@@ -155,15 +155,15 @@ __device__ void RecordHit(HostScoring *hostScoring_dev, int aParentID, char aPar`
`155`	`155`	`vecgeom::Vector3D<Precision> const &aPreMomentumDirection, double aPreEKin, double aPreCharge,`
`156`	`156`	`vecgeom::NavigationState const &aPostState, vecgeom::Vector3D<Precision> const &aPostPosition,`
`157`	`157`	`vecgeom::Vector3D<Precision> const &aPostMomentumDirection, double aPostEKin,`
`158`		`- double aPostCharge, unsigned int, short)`
	`158`	`+ double aPostCharge, unsigned int, short, bool)`
`159`	`159`	`{`
`160`	`160`	`// Acquire a hit slot`
`161`	`161`	`GPUHit &aGPUHit = *GetNextFreeHit(hostScoring_dev);`
`162`	`162`
`163`	`163`	`// Fill the required data`
`164`	`164`	`FillHit(aGPUHit, aParentID, aParticleType, aStepLength, aTotalEnergyDeposit, aPreState, aPrePosition,`
`165`	`165`	`aPreMomentumDirection, aPreEKin, aPreCharge, aPostState, aPostPosition, aPostMomentumDirection, aPostEKin,`
`166`		`- aPostCharge, 0, 0);`
	`166`	`+ aPostCharge, 0, 0, false);`
`167`	`167`	`}`
`168`	`168`
`169`	`169`	`/// @brief Account for the number of produced secondaries`
`@@ -202,7 +202,7 @@ inline void EndOfIteration(HostScoring &hostScoring, HostScoring *hostScoring_de`
`202`	`202`	`COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream));`
`203`	`203`	`// Process the hits on CPU`
`204`	`204`	`for (const auto &hit : hostScoring) {`
`205`		`- integration.ProcessGPUHit(hit);`
	`205`	`+ integration.ProcessGPUStep(hit);`
`206`	`206`	`}`
`207`	`207`	`}`
`208`	`208`	`}`
`@@ -220,7 +220,7 @@ inline void EndOfTransport(HostScoring &hostScoring, HostScoring *hostScoring_de`
`220`	`220`	`COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream));`
`221`	`221`	`// Process the last hits on CPU`
`222`	`222`	`for (const auto &hit : hostScoring) {`
`223`		`- integration.ProcessGPUHit(hit);`
	`223`	`+ integration.ProcessGPUStep(hit);`
`224`	`224`	`}`
`225`	`225`	`}`
`226`	`226`	`} // namespace adept_scoring`