Skip to content

Commit b24ef76

Browse files
enable changing the heap size on device via input parameter (apt-sim#355)
This PR allows to adjust the HeapSize on device via input parameter. This is needed for the ATLAS geometry. Note that this might become redundant when the heap usage on device is fixed but for now it is needed.
1 parent 33f314d commit b24ef76

8 files changed

+25
-1
lines changed

include/AdePT/core/AdePTConfiguration.hh

+3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public:
3737
void SetMillionsOfHitSlots(double millionSlots) { fMillionsOfHitSlots = millionSlots; }
3838
void SetHitBufferFlushThreshold(float threshold) { fHitBufferFlushThreshold = threshold; }
3939
void SetCUDAStackLimit(int limit) { fCUDAStackLimit = limit; }
40+
void SetCUDAHeapLimit(int limit) { fCUDAHeapLimit = limit; }
4041

4142
// We temporarily load VecGeom geometry from GDML
4243
void SetVecGeomGDML(std::string filename) { fVecGeomGDML = filename; }
@@ -47,6 +48,7 @@ public:
4748
int GetVerbosity() { return fVerbosity; };
4849
int GetTransportBufferThreshold() { return fTransportBufferThreshold; }
4950
int GetCUDAStackLimit() { return fCUDAStackLimit; }
51+
int GetCUDAHeapLimit() { return fCUDAHeapLimit; }
5052
float GetHitBufferFlushThreshold() { return fHitBufferFlushThreshold; }
5153
double GetMillionsOfTrackSlots() { return fMillionsOfTrackSlots; }
5254
double GetMillionsOfHitSlots() { return fMillionsOfHitSlots; }
@@ -62,6 +64,7 @@ private:
6264
int fVerbosity{0};
6365
int fTransportBufferThreshold{200};
6466
int fCUDAStackLimit{0};
67+
int fCUDAHeapLimit{0};
6568
float fHitBufferFlushThreshold{0.8};
6669
double fMillionsOfTrackSlots{1};
6770
double fMillionsOfHitSlots{1};

include/AdePT/core/AdePTTransport.h

+2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ class AdePTTransport : public AdePTTransportInterface {
6767
void SetGPURegionNames(std::vector<std::string> const *regionNames) { fGPURegionNames = regionNames; }
6868
/// @brief Set CUDA device stack limit
6969
void SetCUDAStackLimit(int limit) { fCUDAStackLimit = limit; }
70+
void SetCUDAHeapLimit(int limit) { fCUDAHeapLimit = limit; }
7071
std::vector<std::string> const *GetGPURegionNames() { return fGPURegionNames; }
7172
/// @brief Create material-cut couple index array
7273
/// @brief Initialize service and copy geometry & physics data on device
@@ -89,6 +90,7 @@ class AdePTTransport : public AdePTTransportInterface {
8990
size_t fBufferThreshold{20}; ///< Buffer threshold for flushing AdePT transport buffer
9091
int fDebugLevel{1}; ///< Debug level
9192
int fCUDAStackLimit{0}; ///< CUDA device stack limit
93+
int fCUDAHeapLimit{0}; ///< CUDA device heap limit
9294
GPUstate *fGPUstate{nullptr}; ///< CUDA state placeholder
9395
AdeptScoring *fScoring{nullptr}; ///< User scoring object
9496
AdeptScoring *fScoring_dev{nullptr}; ///< Device ptr for scoring data

include/AdePT/core/AdePTTransport.icc

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ AdePTTransport<IntegrationLayer>::AdePTTransport(AdePTConfiguration &configurati
4848
fTrackInAllRegions = configuration.GetTrackInAllRegions();
4949
fGPURegionNames = configuration.GetGPURegionNames();
5050
fCUDAStackLimit = configuration.GetCUDAStackLimit();
51+
fCUDAHeapLimit = configuration.GetCUDAHeapLimit();
5152
fCapacity = 1024 * 1024 * configuration.GetMillionsOfTrackSlots() / configuration.GetNumThreads();
5253
fHitBufferCapacity = 1024 * 1024 * configuration.GetMillionsOfHitSlots() / configuration.GetNumThreads();
5354

@@ -99,6 +100,10 @@ bool AdePTTransport<IntegrationLayer>::InitializeGeometry(const vecgeom::cxx::VP
99100
std::cout << "CUDA Device stack limit: " << fCUDAStackLimit << "\n";
100101
cudaDeviceSetLimit(cudaLimitStackSize, fCUDAStackLimit);
101102
}
103+
if (fCUDAHeapLimit > 0) {
104+
std::cout << "CUDA Device heap limit: " << fCUDAHeapLimit << "\n";
105+
cudaDeviceSetLimit(cudaLimitMallocHeapSize, fCUDAHeapLimit);
106+
}
102107
bool success = true;
103108
#ifdef ADEPT_USE_SURF
104109
#ifdef ADEPT_USE_SURF_SINGLE

include/AdePT/core/AdePTTransportInterface.hh

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public:
4141
virtual void SetGPURegionNames(std::vector<std::string> const *regionNames) = 0;
4242
virtual std::vector<std::string> const *GetGPURegionNames() = 0;
4343
virtual void SetCUDAStackLimit(int limit) = 0;
44+
virtual void SetCUDAHeapLimit(int limit) = 0;
4445
/// @brief Initialize service and copy geometry & physics data on device
4546
virtual void Initialize(bool common_data = false) = 0;
4647
/// @brief Initialize the ApplyCuts flag on device

include/AdePT/core/AsyncAdePTTransport.hh

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ private:
4646
unsigned int fScoringCapacity{0}; ///< Number of hit slots to allocate on device
4747
int fDebugLevel{0}; ///< Debug level
4848
int fCUDAStackLimit{0}; ///< CUDA device stack limit
49+
int fCUDAHeapLimit{0}; ///< CUDA device stack limit
4950
std::vector<IntegrationLayer> fIntegrationLayerObjects;
5051
std::unique_ptr<GPUstate, GPUstateDeleter> fGPUstate{nullptr}; ///< CUDA state placeholder
5152
std::vector<AdePTScoring> fScoring; ///< User scoring objects per G4 worker
@@ -90,6 +91,7 @@ public:
9091
bool GetTrackInAllRegions() const override { return fTrackInAllRegions; }
9192
void SetGPURegionNames(std::vector<std::string> const *regionNames) override { fGPURegionNames = regionNames; }
9293
void SetCUDAStackLimit(int limit) override {};
94+
void SetCUDAHeapLimit(int limit) override {};
9395
std::vector<std::string> const *GetGPURegionNames() override { return fGPURegionNames; }
9496
/// No effect
9597
void Initialize(bool) override {}

include/AdePT/core/AsyncAdePTTransport.icc

+6-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ AsyncAdePTTransport<IntegrationLayer>::AsyncAdePTTransport(AdePTConfiguration &c
8484
fScoringCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfHitSlots())},
8585
fDebugLevel{configuration.GetVerbosity()}, fIntegrationLayerObjects(fNThread), fEventStates(fNThread),
8686
fGPUNetEnergy(fNThread, 0.0), fTrackInAllRegions{configuration.GetTrackInAllRegions()},
87-
fGPURegionNames{configuration.GetGPURegionNames()}, fCUDAStackLimit{configuration.GetCUDAStackLimit()}
87+
fGPURegionNames{configuration.GetGPURegionNames()}, fCUDAStackLimit{configuration.GetCUDAStackLimit()},
88+
fCUDAHeapLimit{configuration.GetCUDAHeapLimit()}
8889
{
8990
if (fNThread > kMaxThreads)
9091
throw std::invalid_argument("AsyncAdePTTransport limited to " + std::to_string(kMaxThreads) + " threads");
@@ -176,6 +177,10 @@ bool AsyncAdePTTransport<IntegrationLayer>::InitializeGeometry(const vecgeom::cx
176177
std::cout << "CUDA Device stack limit: " << fCUDAStackLimit << "\n";
177178
cudaDeviceSetLimit(cudaLimitStackSize, fCUDAStackLimit);
178179
}
180+
if (fCUDAHeapLimit > 0) {
181+
std::cout << "CUDA Device heap limit: " << fCUDAHeapLimit << "\n";
182+
cudaDeviceSetLimit(cudaLimitMallocHeapSize, fCUDAHeapLimit);
183+
}
179184
cudaManager.LoadGeometry(world);
180185
auto world_dev = cudaManager.Synchronize();
181186
// Initialize BVH

include/AdePT/integration/AdePTConfigurationMessenger.hh

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ private:
3434

3535
G4UIdirectory *fDir;
3636
G4UIcmdWithAnInteger *fSetCUDAStackLimitCmd;
37+
G4UIcmdWithAnInteger *fSetCUDAHeapLimitCmd;
3738
G4UIcmdWithABool *fSetTrackInAllRegionsCmd;
3839
G4UIcmdWithAString *fAddRegionCmd;
3940
G4UIcmdWithABool *fActivateAdePTCmd;

src/AdePTConfigurationMessenger.cc

+5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ AdePTConfigurationMessenger::AdePTConfigurationMessenger(AdePTConfiguration *ade
5858

5959
fSetCUDAStackLimitCmd = new G4UIcmdWithAnInteger("/adept/setCUDAStackLimit", this);
6060
fSetCUDAStackLimitCmd->SetGuidance("Set the CUDA device stack limit");
61+
fSetCUDAHeapLimitCmd = new G4UIcmdWithAnInteger("/adept/setCUDAHeapLimit", this);
62+
fSetCUDAHeapLimitCmd->SetGuidance("Set the CUDA device heap limit");
6163
}
6264

6365
//....oooOO0OOooo........oooOO0OOooo........oooOO0OOooo........oooOO0OOooo......
@@ -66,6 +68,7 @@ AdePTConfigurationMessenger::~AdePTConfigurationMessenger()
6668
{
6769
delete fDir;
6870
delete fSetCUDAStackLimitCmd;
71+
delete fSetCUDAHeapLimitCmd;
6972
delete fSetTrackInAllRegionsCmd;
7073
delete fAddRegionCmd;
7174
delete fActivateAdePTCmd;
@@ -101,6 +104,8 @@ void AdePTConfigurationMessenger::SetNewValue(G4UIcommand *command, G4String new
101104
fAdePTConfiguration->SetVecGeomGDML(newValue);
102105
} else if (command == fSetCUDAStackLimitCmd) {
103106
fAdePTConfiguration->SetCUDAStackLimit(fSetCUDAStackLimitCmd->GetNewIntValue(newValue));
107+
} else if (command == fSetCUDAHeapLimitCmd) {
108+
fAdePTConfiguration->SetCUDAHeapLimit(fSetCUDAHeapLimitCmd->GetNewIntValue(newValue));
104109
}
105110
}
106111

0 commit comments

Comments
 (0)