Skip to content

Commit

Permalink
feat: support dynolog on-demand profiling and mt timer env
Browse files Browse the repository at this point in the history
  • Loading branch information
tangliang-mt committed Aug 8, 2024
1 parent 17003f6 commit a73acd5
Show file tree
Hide file tree
Showing 17 changed files with 70 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
url = https://github.com/fmtlib/fmt.git
[submodule "libkineto/third_party/dynolog"]
path = libkineto/third_party/dynolog
url = https://github.com/facebookincubator/dynolog.git
url = https://github.com/MooreThreads/dynolog.git
2 changes: 2 additions & 0 deletions libkineto/ipcfabric/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ struct LibkinetoRequest {
int type;
// size of pids
int n;
// current loop state of pids
int currentRunloopState;
// job id of the libkineto process
int64_t jobid;
// pids of the process and its ancestors
Expand Down
5 changes: 5 additions & 0 deletions libkineto/src/ActivityProfilerController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ void ActivityProfilerController::activateConfig(
asyncRequestConfig_ = nullptr;
}

int ActivityProfilerController::getCurrentRunloopState() {
VLOG(1) << "getCurrentRunloopState";
return profiler_->getCurrentRunloopState();
}

void ActivityProfilerController::scheduleTrace(const Config& config) {
VLOG(1) << "scheduleTrace";
if (profiler_->isActive()) {
Expand Down
1 change: 1 addition & 0 deletions libkineto/src/ActivityProfilerController.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class ActivityProfilerController : public ConfigLoader::ConfigHandler {
bool canAcceptConfig() override;
void acceptConfig(const Config& config) override;
void scheduleTrace(const Config& config);
int getCurrentRunloopState() override;

// These API are used for Synchronous Tracing.
void prepareTrace(const Config& config);
Expand Down
7 changes: 4 additions & 3 deletions libkineto/src/ConfigLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ ConfigLoader& ConfigLoader::instance() {

// return an empty string if polling gets any errors. Otherwise a config string.
std::string ConfigLoader::readOnDemandConfigFromDaemon(
time_point<system_clock> now) {
time_point<system_clock> now, int currentRunloopState) {
if (!daemonConfigLoader_) {
return "";
}
bool events = canHandlerAcceptConfig(ConfigKind::EventProfiler);
bool activities = canHandlerAcceptConfig(ConfigKind::ActivityProfiler);
return daemonConfigLoader_->readOnDemandConfig(events, activities);
return daemonConfigLoader_->readOnDemandConfig(events, activities, currentRunloopState);
}

int ConfigLoader::contextCountForGpu(uint32_t device) {
Expand Down Expand Up @@ -263,7 +263,8 @@ void ConfigLoader::configureFromSignal(
void ConfigLoader::configureFromDaemon(
time_point<system_clock> now,
Config& config) {
const std::string config_str = readOnDemandConfigFromDaemon(now);
const int currentRunloopState = getCurrentRunloopState();
const std::string config_str = readOnDemandConfigFromDaemon(now, currentRunloopState);
if (config_str.empty()) {
return;
}
Expand Down
14 changes: 13 additions & 1 deletion libkineto/src/ConfigLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class ConfigLoader {
virtual ~ConfigHandler() {}
virtual bool canAcceptConfig() = 0;
virtual void acceptConfig(const Config& cfg) = 0;
virtual int getCurrentRunloopState() = 0;
};

void addHandler(ConfigKind kind, ConfigHandler* handler) {
Expand All @@ -73,6 +74,17 @@ class ConfigLoader {
}
}

int getCurrentRunloopState() {
std::lock_guard<std::mutex> lock(updateThreadMutex_);
for (auto& key_val : handlers_) {
if (key_val.first == ConfigKind::ActivityProfiler) {
for (ConfigHandler* handler : key_val.second) {
return handler->getCurrentRunloopState();
}
}
}
}

bool canHandlerAcceptConfig(ConfigKind kind) {
std::lock_guard<std::mutex> lock(updateThreadMutex_);
for (ConfigHandler* handler : handlers_[kind]) {
Expand Down Expand Up @@ -131,7 +143,7 @@ class ConfigLoader {
Config& config);

std::string readOnDemandConfigFromDaemon(
std::chrono::time_point<std::chrono::system_clock> now);
std::chrono::time_point<std::chrono::system_clock> now, int currentRunloopState);

const char* customConfigFileName();

Expand Down
4 changes: 2 additions & 2 deletions libkineto/src/DaemonConfigLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ std::string DaemonConfigLoader::readBaseConfig() {
return configClient->getLibkinetoBaseConfig();
}

std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities) {
std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities, int currentRunloopState) {
auto configClient = getConfigClient();
if (!configClient) {
LOG_EVERY_N(WARNING, 10) << "Failed to read config: No dyno config client";
Expand All @@ -46,7 +46,7 @@ std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities)
if (activities) {
config_type |= int(LibkinetoConfigType::ACTIVITIES);
}
return configClient->getLibkinetoOndemandConfig(config_type);
return configClient->getLibkinetoOndemandConfig(config_type, currentRunloopState);
}

int DaemonConfigLoader::gpuContextCount(uint32_t device) {
Expand Down
4 changes: 2 additions & 2 deletions libkineto/src/DaemonConfigLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class IDaemonConfigLoader {
virtual std::string readBaseConfig() = 0;

// Return a configuration string from the daemon, if one has been posted.
virtual std::string readOnDemandConfig(bool events, bool activities) = 0;
virtual std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) = 0;

// Returns the number of tracked contexts for this device. The daemon has a
// global view. If an unexpedted error occurs, return -1.
Expand All @@ -45,7 +45,7 @@ class DaemonConfigLoader : public IDaemonConfigLoader {
std::string readBaseConfig() override;

// Return a configuration string from the daemon, if one has been posted.
std::string readOnDemandConfig(bool events, bool activities) override;
std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) override;

// Returns the number of tracked contexts for this device. The daemon has a
// global view. If an unexpected error occurs, return -1.
Expand Down
5 changes: 5 additions & 0 deletions libkineto/src/EventProfilerController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@ void EventProfilerController::acceptConfig(const Config& config) {
LOG(INFO) << "Received new on-demand config";
}

int EventProfilerController::getCurrentRunloopState() {
VLOG(1) << "getCurrentRunloopState";
return -1;
}

bool EventProfilerController::enableForDevice(Config& cfg) {
// FIXME: Use device unique id!
if (!cfg.eventProfilerEnabledForDevice(profiler_->device())) {
Expand Down
2 changes: 2 additions & 0 deletions libkineto/src/EventProfilerController.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class EventProfilerController : public ConfigLoader::ConfigHandler {

void acceptConfig(const Config& config) override;

int getCurrentRunloopState() override;

private:
explicit EventProfilerController(
MUcontext context,
Expand Down
3 changes: 2 additions & 1 deletion libkineto/src/IpcFabricConfigClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ std::string IpcFabricConfigClient::getLibkinetoBaseConfig() {
return "";
}

std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) {
std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type, int currentRunloopState) {
if (!ipcFabricEnabled_) {
return "";
}
Expand All @@ -157,6 +157,7 @@ std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) {
::dynolog::ipcfabric::LibkinetoRequest* req = (::dynolog::ipcfabric::LibkinetoRequest*)malloc(sizeof(::dynolog::ipcfabric::LibkinetoRequest) + sizeof(int32_t) * size);
req->type = type;
req->n = size;
req->currentRunloopState = currentRunloopState;
req->jobid = jobId_;
for (int i = 0; i < size; i++) {
req->pids[i] = pids_[i];
Expand Down
2 changes: 1 addition & 1 deletion libkineto/src/IpcFabricConfigClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class IpcFabricConfigClient {

// Get on demand configurations for tracing/counter collection
// type is a bit mask, please see LibkinetoConfigType encoding above.
virtual std::string getLibkinetoOndemandConfig(int32_t type);
virtual std::string getLibkinetoOndemandConfig(int32_t type, int currentRunloopState);

void setIpcFabricEnabled(bool enabled) {
ipcFabricEnabled_ = enabled;
Expand Down
4 changes: 3 additions & 1 deletion libkineto/src/MuptiActivityApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ void MuptiActivityApi::setDeviceBufferPoolLimit(size_t limit) {
void MuptiActivityApi::forceLoadMupti() {
#ifdef HAS_MUPTI
MUPTI_CALL(muptiActivityEnable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
MUPTI_CALL(muptiActivityDisable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
#endif
}

Expand All @@ -163,7 +164,8 @@ void MuptiActivityApi::bufferRequested(
uint8_t** buffer, size_t* size, size_t* maxNumRecords) {
std::lock_guard<std::mutex> guard(mutex_);
if (allocatedGpuTraceBuffers_.size() >= maxGpuBufferCount_) {
stopCollection = true;
// comment this to avoid stopping the collection when the buffer is full
// stopCollection = true;
LOG(WARNING) << "Exceeded max GPU buffer count ("
<< allocatedGpuTraceBuffers_.size()
<< " > " << maxGpuBufferCount_
Expand Down
12 changes: 12 additions & 0 deletions libkineto/src/MuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,18 @@ void MuptiActivityProfiler::startTraceInternal(
void MuptiActivityProfiler::stopTraceInternal(
const time_point<system_clock>& now) {
captureWindowEndTime_ = libkineto::timeSinceEpoch(now);
bool enable_mt_timer_gpu_events = (getenv("MT_TIMER_GPU_EVENTS") != nullptr);
int captureWindowLen = 60; // set window len as 60s to avoid too much replicated data
if(enable_mt_timer_gpu_events) {
if (getenv("MT_TIMER_CAPTURE_WINDOW_LEN")) {
try {
captureWindowLen = std::stoi(getenv("MT_TIMER_CAPTURE_WINDOW_LEN"));
} catch (const std::invalid_argument& e) {
LOG(ERROR) << "Invalid value for MT_TIMER_CAPTURE_WINDOW_LEN. Using default 60s.\n";
}
}
captureWindowEndTime_ = captureWindowStartTime_ + 1000000*captureWindowLen;
}
#if defined(HAS_MUPTI) || defined(HAS_ROCTRACER)
if (!cpuOnly_) {
time_point<system_clock> timestamp;
Expand Down
13 changes: 13 additions & 0 deletions libkineto/src/MuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ class MuptiActivityProfiler {
return currentRunloopState_ != RunloopState::WaitForRequest;
}

int getCurrentRunloopState() const {
switch (currentRunloopState_) {
case RunloopState::WaitForRequest:
return 0;
case RunloopState::Warmup:
return 1;
case RunloopState::CollectTrace:
return 2;
case RunloopState::ProcessTrace:
return 3;
}
}

// Invoke at a regular interval to perform profiling activities.
// When not active, an interval of 1-5 seconds is probably fine,
// depending on required warm-up time and delayed start time.
Expand Down
2 changes: 1 addition & 1 deletion libkineto/src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void libkineto_init(bool cpuOnly, bool logOnError) {
}
}

if (shouldPreloadMuptiInstrumentation()) {
if (!cpuOnly && shouldPreloadMuptiInstrumentation()) {
MuptiActivityApi::forceLoadMupti();
}
#endif // HAS_MUPTI
Expand Down
2 changes: 1 addition & 1 deletion libkineto/third_party/dynolog

0 comments on commit a73acd5

Please sign in to comment.