diff --git a/.gitmodules b/.gitmodules index c50e122..f3e2107 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,4 +6,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "libkineto/third_party/dynolog"] path = libkineto/third_party/dynolog - url = https://github.com/facebookincubator/dynolog.git + url = https://github.com/MooreThreads/dynolog.git diff --git a/libkineto/ipcfabric/Utils.h b/libkineto/ipcfabric/Utils.h index c32a8fc..774a642 100644 --- a/libkineto/ipcfabric/Utils.h +++ b/libkineto/ipcfabric/Utils.h @@ -27,6 +27,8 @@ struct LibkinetoRequest { int type; // size of pids int n; + // current loop state of pids + int currentRunloopState; // job id of the libkineto process int64_t jobid; // pids of the process and its ancestors diff --git a/libkineto/src/ActivityProfilerController.cpp b/libkineto/src/ActivityProfilerController.cpp index e7c5f6a..236b89f 100644 --- a/libkineto/src/ActivityProfilerController.cpp +++ b/libkineto/src/ActivityProfilerController.cpp @@ -259,6 +259,11 @@ void ActivityProfilerController::activateConfig( asyncRequestConfig_ = nullptr; } +int ActivityProfilerController::getCurrentRunloopState() { + VLOG(1) << "getCurrentRunloopState"; + return profiler_->getCurrentRunloopState(); +} + void ActivityProfilerController::scheduleTrace(const Config& config) { VLOG(1) << "scheduleTrace"; if (profiler_->isActive()) { diff --git a/libkineto/src/ActivityProfilerController.h b/libkineto/src/ActivityProfilerController.h index 672dce4..96ae53b 100644 --- a/libkineto/src/ActivityProfilerController.h +++ b/libkineto/src/ActivityProfilerController.h @@ -55,6 +55,7 @@ class ActivityProfilerController : public ConfigLoader::ConfigHandler { bool canAcceptConfig() override; void acceptConfig(const Config& config) override; void scheduleTrace(const Config& config); + int getCurrentRunloopState() override; // These API are used for Synchronous Tracing. void prepareTrace(const Config& config); diff --git a/libkineto/src/ConfigLoader.cpp b/libkineto/src/ConfigLoader.cpp index 72fb04e..0dc6f2a 100644 --- a/libkineto/src/ConfigLoader.cpp +++ b/libkineto/src/ConfigLoader.cpp @@ -125,13 +125,13 @@ ConfigLoader& ConfigLoader::instance() { // return an empty string if polling gets any errors. Otherwise a config string. std::string ConfigLoader::readOnDemandConfigFromDaemon( - time_point now) { + time_point now, int currentRunloopState) { if (!daemonConfigLoader_) { return ""; } bool events = canHandlerAcceptConfig(ConfigKind::EventProfiler); bool activities = canHandlerAcceptConfig(ConfigKind::ActivityProfiler); - return daemonConfigLoader_->readOnDemandConfig(events, activities); + return daemonConfigLoader_->readOnDemandConfig(events, activities, currentRunloopState); } int ConfigLoader::contextCountForGpu(uint32_t device) { @@ -263,7 +263,8 @@ void ConfigLoader::configureFromSignal( void ConfigLoader::configureFromDaemon( time_point now, Config& config) { - const std::string config_str = readOnDemandConfigFromDaemon(now); + const int currentRunloopState = getCurrentRunloopState(); + const std::string config_str = readOnDemandConfigFromDaemon(now, currentRunloopState); if (config_str.empty()) { return; } diff --git a/libkineto/src/ConfigLoader.h b/libkineto/src/ConfigLoader.h index ce5ffe9..fb6af1b 100644 --- a/libkineto/src/ConfigLoader.h +++ b/libkineto/src/ConfigLoader.h @@ -47,6 +47,7 @@ class ConfigLoader { virtual ~ConfigHandler() {} virtual bool canAcceptConfig() = 0; virtual void acceptConfig(const Config& cfg) = 0; + virtual int getCurrentRunloopState() = 0; }; void addHandler(ConfigKind kind, ConfigHandler* handler) { @@ -73,6 +74,17 @@ class ConfigLoader { } } + int getCurrentRunloopState() { + std::lock_guard lock(updateThreadMutex_); + for (auto& key_val : handlers_) { + if (key_val.first == ConfigKind::ActivityProfiler) { + for (ConfigHandler* handler : key_val.second) { + return handler->getCurrentRunloopState(); + } + } + } + } + bool canHandlerAcceptConfig(ConfigKind kind) { std::lock_guard lock(updateThreadMutex_); for (ConfigHandler* handler : handlers_[kind]) { @@ -131,7 +143,7 @@ class ConfigLoader { Config& config); std::string readOnDemandConfigFromDaemon( - std::chrono::time_point now); + std::chrono::time_point now, int currentRunloopState); const char* customConfigFileName(); diff --git a/libkineto/src/DaemonConfigLoader.cpp b/libkineto/src/DaemonConfigLoader.cpp index 4d2ca4c..2b56b0d 100644 --- a/libkineto/src/DaemonConfigLoader.cpp +++ b/libkineto/src/DaemonConfigLoader.cpp @@ -33,7 +33,7 @@ std::string DaemonConfigLoader::readBaseConfig() { return configClient->getLibkinetoBaseConfig(); } -std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities) { +std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities, int currentRunloopState) { auto configClient = getConfigClient(); if (!configClient) { LOG_EVERY_N(WARNING, 10) << "Failed to read config: No dyno config client"; @@ -46,7 +46,7 @@ std::string DaemonConfigLoader::readOnDemandConfig(bool events, bool activities) if (activities) { config_type |= int(LibkinetoConfigType::ACTIVITIES); } - return configClient->getLibkinetoOndemandConfig(config_type); + return configClient->getLibkinetoOndemandConfig(config_type, currentRunloopState); } int DaemonConfigLoader::gpuContextCount(uint32_t device) { diff --git a/libkineto/src/DaemonConfigLoader.h b/libkineto/src/DaemonConfigLoader.h index 1d920e1..91d040d 100644 --- a/libkineto/src/DaemonConfigLoader.h +++ b/libkineto/src/DaemonConfigLoader.h @@ -25,7 +25,7 @@ class IDaemonConfigLoader { virtual std::string readBaseConfig() = 0; // Return a configuration string from the daemon, if one has been posted. - virtual std::string readOnDemandConfig(bool events, bool activities) = 0; + virtual std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) = 0; // Returns the number of tracked contexts for this device. The daemon has a // global view. If an unexpedted error occurs, return -1. @@ -45,7 +45,7 @@ class DaemonConfigLoader : public IDaemonConfigLoader { std::string readBaseConfig() override; // Return a configuration string from the daemon, if one has been posted. - std::string readOnDemandConfig(bool events, bool activities) override; + std::string readOnDemandConfig(bool events, bool activities, int currentRunloopState) override; // Returns the number of tracked contexts for this device. The daemon has a // global view. If an unexpected error occurs, return -1. diff --git a/libkineto/src/EventProfilerController.cpp b/libkineto/src/EventProfilerController.cpp index 02da2d8..4b86793 100644 --- a/libkineto/src/EventProfilerController.cpp +++ b/libkineto/src/EventProfilerController.cpp @@ -284,6 +284,11 @@ void EventProfilerController::acceptConfig(const Config& config) { LOG(INFO) << "Received new on-demand config"; } +int EventProfilerController::getCurrentRunloopState() { + VLOG(1) << "getCurrentRunloopState"; + return -1; +} + bool EventProfilerController::enableForDevice(Config& cfg) { // FIXME: Use device unique id! if (!cfg.eventProfilerEnabledForDevice(profiler_->device())) { diff --git a/libkineto/src/EventProfilerController.h b/libkineto/src/EventProfilerController.h index 8c79c86..1db5b25 100644 --- a/libkineto/src/EventProfilerController.h +++ b/libkineto/src/EventProfilerController.h @@ -49,6 +49,8 @@ class EventProfilerController : public ConfigLoader::ConfigHandler { void acceptConfig(const Config& config) override; + int getCurrentRunloopState() override; + private: explicit EventProfilerController( MUcontext context, diff --git a/libkineto/src/IpcFabricConfigClient.cpp b/libkineto/src/IpcFabricConfigClient.cpp index fc2970c..a58db15 100644 --- a/libkineto/src/IpcFabricConfigClient.cpp +++ b/libkineto/src/IpcFabricConfigClient.cpp @@ -143,7 +143,7 @@ std::string IpcFabricConfigClient::getLibkinetoBaseConfig() { return ""; } -std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) { +std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type, int currentRunloopState) { if (!ipcFabricEnabled_) { return ""; } @@ -157,6 +157,7 @@ std::string IpcFabricConfigClient::getLibkinetoOndemandConfig(int32_t type) { ::dynolog::ipcfabric::LibkinetoRequest* req = (::dynolog::ipcfabric::LibkinetoRequest*)malloc(sizeof(::dynolog::ipcfabric::LibkinetoRequest) + sizeof(int32_t) * size); req->type = type; req->n = size; + req->currentRunloopState = currentRunloopState; req->jobid = jobId_; for (int i = 0; i < size; i++) { req->pids[i] = pids_[i]; diff --git a/libkineto/src/IpcFabricConfigClient.h b/libkineto/src/IpcFabricConfigClient.h index 0ee7c8c..55389e0 100644 --- a/libkineto/src/IpcFabricConfigClient.h +++ b/libkineto/src/IpcFabricConfigClient.h @@ -44,7 +44,7 @@ class IpcFabricConfigClient { // Get on demand configurations for tracing/counter collection // type is a bit mask, please see LibkinetoConfigType encoding above. - virtual std::string getLibkinetoOndemandConfig(int32_t type); + virtual std::string getLibkinetoOndemandConfig(int32_t type, int currentRunloopState); void setIpcFabricEnabled(bool enabled) { ipcFabricEnabled_ = enabled; diff --git a/libkineto/src/MuptiActivityApi.cpp b/libkineto/src/MuptiActivityApi.cpp index 6b41bcd..eed5925 100644 --- a/libkineto/src/MuptiActivityApi.cpp +++ b/libkineto/src/MuptiActivityApi.cpp @@ -140,6 +140,7 @@ void MuptiActivityApi::setDeviceBufferPoolLimit(size_t limit) { void MuptiActivityApi::forceLoadMupti() { #ifdef HAS_MUPTI MUPTI_CALL(muptiActivityEnable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); + MUPTI_CALL(muptiActivityDisable(MUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); #endif } @@ -163,7 +164,8 @@ void MuptiActivityApi::bufferRequested( uint8_t** buffer, size_t* size, size_t* maxNumRecords) { std::lock_guard guard(mutex_); if (allocatedGpuTraceBuffers_.size() >= maxGpuBufferCount_) { - stopCollection = true; + // comment this to avoid stopping the collection when the buffer is full + // stopCollection = true; LOG(WARNING) << "Exceeded max GPU buffer count (" << allocatedGpuTraceBuffers_.size() << " > " << maxGpuBufferCount_ diff --git a/libkineto/src/MuptiActivityProfiler.cpp b/libkineto/src/MuptiActivityProfiler.cpp index 5e585a8..15e3136 100644 --- a/libkineto/src/MuptiActivityProfiler.cpp +++ b/libkineto/src/MuptiActivityProfiler.cpp @@ -918,6 +918,18 @@ void MuptiActivityProfiler::startTraceInternal( void MuptiActivityProfiler::stopTraceInternal( const time_point& now) { captureWindowEndTime_ = libkineto::timeSinceEpoch(now); + bool enable_mt_timer_gpu_events = (getenv("MT_TIMER_GPU_EVENTS") != nullptr); + int captureWindowLen = 60; // set window len as 60s to avoid too much replicated data + if(enable_mt_timer_gpu_events) { + if (getenv("MT_TIMER_CAPTURE_WINDOW_LEN")) { + try { + captureWindowLen = std::stoi(getenv("MT_TIMER_CAPTURE_WINDOW_LEN")); + } catch (const std::invalid_argument& e) { + LOG(ERROR) << "Invalid value for MT_TIMER_CAPTURE_WINDOW_LEN. Using default 60s.\n"; + } + } + captureWindowEndTime_ = captureWindowStartTime_ + 1000000*captureWindowLen; + } #if defined(HAS_MUPTI) || defined(HAS_ROCTRACER) if (!cpuOnly_) { time_point timestamp; diff --git a/libkineto/src/MuptiActivityProfiler.h b/libkineto/src/MuptiActivityProfiler.h index 51a40d0..96283c3 100644 --- a/libkineto/src/MuptiActivityProfiler.h +++ b/libkineto/src/MuptiActivityProfiler.h @@ -116,6 +116,19 @@ class MuptiActivityProfiler { return currentRunloopState_ != RunloopState::WaitForRequest; } + int getCurrentRunloopState() const { + switch (currentRunloopState_) { + case RunloopState::WaitForRequest: + return 0; + case RunloopState::Warmup: + return 1; + case RunloopState::CollectTrace: + return 2; + case RunloopState::ProcessTrace: + return 3; + } + } + // Invoke at a regular interval to perform profiling activities. // When not active, an interval of 1-5 seconds is probably fine, // depending on required warm-up time and delayed start time. diff --git a/libkineto/src/init.cpp b/libkineto/src/init.cpp index 70da590..34ffa6e 100644 --- a/libkineto/src/init.cpp +++ b/libkineto/src/init.cpp @@ -166,7 +166,7 @@ void libkineto_init(bool cpuOnly, bool logOnError) { } } - if (shouldPreloadMuptiInstrumentation()) { + if (!cpuOnly && shouldPreloadMuptiInstrumentation()) { MuptiActivityApi::forceLoadMupti(); } #endif // HAS_MUPTI diff --git a/libkineto/third_party/dynolog b/libkineto/third_party/dynolog index 2ab7d6a..f12705e 160000 --- a/libkineto/third_party/dynolog +++ b/libkineto/third_party/dynolog @@ -1 +1 @@ -Subproject commit 2ab7d6a3d5c051d8e4651aa2d31488145d9cbb4c +Subproject commit f12705ed7c1bfc72bdc0f879ae00ea310b56ef09