diff --git a/dynolog/src/ServiceHandler.cpp b/dynolog/src/ServiceHandler.cpp index c6e2fbb..f441af9 100644 --- a/dynolog/src/ServiceHandler.cpp +++ b/dynolog/src/ServiceHandler.cpp @@ -9,6 +9,9 @@ namespace dynolog { int ServiceHandler::getStatus() { + if (dcgm_) { + return dcgm_->getRpcStatus(); + } return 1; } diff --git a/dynolog/src/gpumon/DcgmGroupInfo.cpp b/dynolog/src/gpumon/DcgmGroupInfo.cpp index 6754faa..2787f74 100644 --- a/dynolog/src/gpumon/DcgmGroupInfo.cpp +++ b/dynolog/src/gpumon/DcgmGroupInfo.cpp @@ -324,6 +324,7 @@ void DcgmGroupInfo::update() { } } metricsInt["dcgm_error"] = blank_value_field ? 1 : 0; + rpcStatus_ = blank_value_field ? 0 : 1; metricsMapDouble_[entity.m_entityId] = metricsDouble; metricsMapInt_[entity.m_entityId] = metricsInt; } diff --git a/dynolog/src/gpumon/DcgmGroupInfo.h b/dynolog/src/gpumon/DcgmGroupInfo.h index 8345363..9647add 100644 --- a/dynolog/src/gpumon/DcgmGroupInfo.h +++ b/dynolog/src/gpumon/DcgmGroupInfo.h @@ -36,6 +36,9 @@ class DcgmGroupInfo { void log(Logger& logger); bool pauseProfiling(int duration_s); bool resumeProfiling(); + int getRpcStatus() const { + return rpcStatus_; + } private: DcgmGroupInfo( @@ -49,6 +52,7 @@ class DcgmGroupInfo { void watchProfFields(const std::vector& prof_fields); std::vector gpuIdList_; + int rpcStatus_ = 1; // Default to 1, will be set to 0 when DCGM_INT64_BLANK is detected int deviceCount_ = 0; bool profEnabled_ = false; int updateIntervalMs_;