Skip to content

Commit a73acd5

Browse files
author
tangliang-mt
committed
feat: support dynolog on-demand profiling and mt timer env
1 parent 17003f6 commit a73acd5

17 files changed

+70
-14
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
url = https://github.com/fmtlib/fmt.git
77
[submodule "libkineto/third_party/dynolog"]
88
path = libkineto/third_party/dynolog
9-
url = https://github.com/facebookincubator/dynolog.git
9+
url = https://github.com/MooreThreads/dynolog.git

libkineto/ipcfabric/Utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ struct LibkinetoRequest {
2727
int type;
2828
// size of pids
2929
int n;
30+
// current loop state of pids
31+
int currentRunloopState;
3032
// job id of the libkineto process
3133
int64_t jobid;
3234
// pids of the process and its ancestors

libkineto/src/ActivityProfilerController.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ void ActivityProfilerController::activateConfig(
259259
asyncRequestConfig_ = nullptr;
260260
}
261261

262+
int ActivityProfilerController::getCurrentRunloopState() {
263+
VLOG(1) << "getCurrentRunloopState";
264+
return profiler_->getCurrentRunloopState();
265+
}
266+
262267
void ActivityProfilerController::scheduleTrace(const Config& config) {
263268
VLOG(1) << "scheduleTrace";
264269
if (profiler_->isActive()) {

libkineto/src/ActivityProfilerController.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class ActivityProfilerController : public ConfigLoader::ConfigHandler {
5555
bool canAcceptConfig() override;
5656
void acceptConfig(const Config& config) override;
5757
void scheduleTrace(const Config& config);
58+
int getCurrentRunloopState() override;
5859

5960
// These API are used for Synchronous Tracing.
6061
void prepareTrace(const Config& config);

libkineto/src/ConfigLoader.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,13 @@ ConfigLoader& ConfigLoader::instance() {
125125

126126
// return an empty string if polling gets any errors. Otherwise a config string.
127127
std::string ConfigLoader::readOnDemandConfigFromDaemon(
128-
time_point<system_clock> now) {
128+
time_point<system_clock> now, int currentRunloopState) {
129129
if (!daemonConfigLoader_) {
130130
return "";
131131
}
132132
bool events = canHandlerAcceptConfig(ConfigKind::EventProfiler);
133133
bool activities = canHandlerAcceptConfig(ConfigKind::ActivityProfiler);
134-
return daemonConfigLoader_->readOnDemandConfig(events, activities);
134+
return daemonConfigLoader_->readOnDemandConfig(events, activities, currentRunloopState);
135135
}
136136

137137
int ConfigLoader::contextCountForGpu(uint32_t device) {
@@ -263,7 +263,8 @@ void ConfigLoader::configureFromSignal(
263263
void ConfigLoader::configureFromDaemon(
264264
time_point<system_clock> now,
265265
Config& config) {
266-
const std::string config_str = readOnDemandConfigFromDaemon(now);
266+
const int currentRunloopState = getCurrentRunloopState();
267+
const std::string config_str = readOnDemandConfigFromDaemon(now, currentRunloopState);
267268
if (config_str.empty()) {
268269
return;
269270
}

libkineto/src/ConfigLoader.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ConfigLoader {
4747
virtual ~ConfigHandler() {}
4848
virtual bool canAcceptConfig() = 0;
4949
virtual void acceptConfig(const Config& cfg) = 0;
50+
virtual int getCurrentRunloopState() = 0;
5051
};
5152

5253
void addHandler(ConfigKind kind, ConfigHandler* handler) {
@@ -73,6 +74,17 @@ class ConfigLoader {
7374
}
7475
}
7576

77+
int getCurrentRunloopState() {
78+
std::lock_guard<std::mutex> lock(updateThreadMutex_);
79+
for (auto& key_val : handlers_) {
80+
if (key_val.first == ConfigKind::ActivityProfiler) {
81+
for (ConfigHandler* handler : key_val.second) {
82+
return handler->getCurrentRunloopState();
83+
}
84+
}
85+
}
86+
}
87+
7688
bool canHandlerAcceptConfig(ConfigKind kind) {
7789
std::lock_guard<std::mutex> lock(updateThreadMutex_);
7890
for (ConfigHandler* handler : handlers_[kind]) {
@@ -131,7 +143,7 @@ class ConfigLoader {
131143
Config& config);
132144

133145
std::string readOnDemandConfigFromDaemon(
134-
std::chrono::time_point<std::chrono::system_clock> now);
146+
std::chrono::time_point<std::chrono::system_clock> now, int currentRunloopState);
135147

136148
const char* customConfigFileName();
137149

libkineto/src/DaemonConfigLoader.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ std::string DaemonConfigLoader::readBaseConfig() {
3333
return configClient->getLibkinetoBaseConfig();
3434
}
3535

36-
std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities) {
36+
std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities, int currentRunloopState) {
3737
auto configClient = getConfigClient();
3838
if (!configClient) {
3939
LOG_EVERY_N(WARNING, 10) << "Failed to read config: No dyno config client";
@@ -46,7 +46,7 @@ std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities)
4646
if (activities) {
4747
config_type |= int(LibkinetoConfigType::ACTIVITIES);
4848
}
49-
return configClient->getLibkinetoOndemandConfig(config_type);
49+
return configClient->getLibkinetoOndemandConfig(config_type, currentRunloopState);
5050
}
5151

5252
int DaemonConfigLoader::gpuContextCount(uint32_t device) {

libkineto/src/DaemonConfigLoader.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class IDaemonConfigLoader {
2525
virtual std::string readBaseConfig() = 0;
2626

2727
// Return a configuration string from the daemon, if one has been posted.
28-
virtual std::string readOnDemandConfig(bool events, bool activities) = 0;
28+
virtual std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) = 0;
2929

3030
// Returns the number of tracked contexts for this device. The daemon has a
3131
// global view. If an unexpedted error occurs, return -1.
@@ -45,7 +45,7 @@ class DaemonConfigLoader : public IDaemonConfigLoader {
4545
std::string readBaseConfig() override;
4646

4747
// Return a configuration string from the daemon, if one has been posted.
48-
std::string readOnDemandConfig(bool events, bool activities) override;
48+
std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) override;
4949

5050
// Returns the number of tracked contexts for this device. The daemon has a
5151
// global view. If an unexpected error occurs, return -1.

libkineto/src/EventProfilerController.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,11 @@ void EventProfilerController::acceptConfig(const Config& config) {
284284
LOG(INFO) << "Received new on-demand config";
285285
}
286286

287+
int EventProfilerController::getCurrentRunloopState() {
288+
VLOG(1) << "getCurrentRunloopState";
289+
return -1;
290+
}
291+
287292
bool EventProfilerController::enableForDevice(Config& cfg) {
288293
// FIXME: Use device unique id!
289294
if (!cfg.eventProfilerEnabledForDevice(profiler_->device())) {

libkineto/src/EventProfilerController.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ class EventProfilerController : public ConfigLoader::ConfigHandler {
4949

5050
void acceptConfig(const Config& config) override;
5151

52+
int getCurrentRunloopState() override;
53+
5254
private:
5355
explicit EventProfilerController(
5456
MUcontext context,

libkineto/src/IpcFabricConfigClient.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ std::string IpcFabricConfigClient::getLibkinetoBaseConfig() {
143143
return "";
144144
}
145145

146-
std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) {
146+
std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type, int currentRunloopState) {
147147
if (!ipcFabricEnabled_) {
148148
return "";
149149
}
@@ -157,6 +157,7 @@ std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) {
157157
::dynolog::ipcfabric::LibkinetoRequest* req = (::dynolog::ipcfabric::LibkinetoRequest*)malloc(sizeof(::dynolog::ipcfabric::LibkinetoRequest) + sizeof(int32_t) * size);
158158
req->type = type;
159159
req->n = size;
160+
req->currentRunloopState = currentRunloopState;
160161
req->jobid = jobId_;
161162
for (int i = 0; i < size; i++) {
162163
req->pids[i] = pids_[i];

libkineto/src/IpcFabricConfigClient.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class IpcFabricConfigClient {
4444

4545
// Get on demand configurations for tracing/counter collection
4646
// type is a bit mask, please see LibkinetoConfigType encoding above.
47-
virtual std::string getLibkinetoOndemandConfig(int32_t type);
47+
virtual std::string getLibkinetoOndemandConfig(int32_t type, int currentRunloopState);
4848

4949
void setIpcFabricEnabled(bool enabled) {
5050
ipcFabricEnabled_ = enabled;

libkineto/src/MuptiActivityApi.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ void MuptiActivityApi::setDeviceBufferPoolLimit(size_t limit) {
140140
void MuptiActivityApi::forceLoadMupti() {
141141
#ifdef HAS_MUPTI
142142
MUPTI_CALL(muptiActivityEnable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
143+
MUPTI_CALL(muptiActivityDisable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
143144
#endif
144145
}
145146

@@ -163,7 +164,8 @@ void MuptiActivityApi::bufferRequested(
163164
uint8_t** buffer, size_t* size, size_t* maxNumRecords) {
164165
std::lock_guard<std::mutex> guard(mutex_);
165166
if (allocatedGpuTraceBuffers_.size() >= maxGpuBufferCount_) {
166-
stopCollection = true;
167+
// comment this to avoid stopping the collection when the buffer is full
168+
// stopCollection = true;
167169
LOG(WARNING) << "Exceeded max GPU buffer count ("
168170
<< allocatedGpuTraceBuffers_.size()
169171
<< " > " << maxGpuBufferCount_

libkineto/src/MuptiActivityProfiler.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,18 @@ void MuptiActivityProfiler::startTraceInternal(
918918
void MuptiActivityProfiler::stopTraceInternal(
919919
const time_point<system_clock>& now) {
920920
captureWindowEndTime_ = libkineto::timeSinceEpoch(now);
921+
bool enable_mt_timer_gpu_events = (getenv("MT_TIMER_GPU_EVENTS") != nullptr);
922+
int captureWindowLen = 60; // set window len as 60s to avoid too much replicated data
923+
if(enable_mt_timer_gpu_events) {
924+
if (getenv("MT_TIMER_CAPTURE_WINDOW_LEN")) {
925+
try {
926+
captureWindowLen = std::stoi(getenv("MT_TIMER_CAPTURE_WINDOW_LEN"));
927+
} catch (const std::invalid_argument& e) {
928+
LOG(ERROR) << "Invalid value for MT_TIMER_CAPTURE_WINDOW_LEN. Using default 60s.\n";
929+
}
930+
}
931+
captureWindowEndTime_ = captureWindowStartTime_ + 1000000*captureWindowLen;
932+
}
921933
#if defined(HAS_MUPTI) || defined(HAS_ROCTRACER)
922934
if (!cpuOnly_) {
923935
time_point<system_clock> timestamp;

libkineto/src/MuptiActivityProfiler.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,19 @@ class MuptiActivityProfiler {
116116
return currentRunloopState_ != RunloopState::WaitForRequest;
117117
}
118118

119+
int getCurrentRunloopState() const {
120+
switch (currentRunloopState_) {
121+
case RunloopState::WaitForRequest:
122+
return 0;
123+
case RunloopState::Warmup:
124+
return 1;
125+
case RunloopState::CollectTrace:
126+
return 2;
127+
case RunloopState::ProcessTrace:
128+
return 3;
129+
}
130+
}
131+
119132
// Invoke at a regular interval to perform profiling activities.
120133
// When not active, an interval of 1-5 seconds is probably fine,
121134
// depending on required warm-up time and delayed start time.

libkineto/src/init.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ void libkineto_init(bool cpuOnly, bool logOnError) {
166166
}
167167
}
168168

169-
if (shouldPreloadMuptiInstrumentation()) {
169+
if (!cpuOnly && shouldPreloadMuptiInstrumentation()) {
170170
MuptiActivityApi::forceLoadMupti();
171171
}
172172
#endif // HAS_MUPTI

libkineto/third_party/dynolog

0 commit comments

Comments
 (0)