Skip to content

Commit ddd2f1b

Browse files
abhiShedgefacebook-github-bot
authored andcommitted
Add a new monitor for procfs interrupts (#373)
Summary: We are following the newly established monitor-publisher pattern of reading and exposing the perf metrics. The logic to process interrupt data will be movced to a separate monitor class of type MonitorBase. What to expect in upcoming diffs? - Unit test for the new monitor calss - Integration with the Server.cpp - Cleanup of the old code Differential Revision: D74662495
1 parent a8d535e commit ddd2f1b

File tree

2 files changed

+204
-0
lines changed

2 files changed

+204
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#include "dynolog/src/procfs/parser/InterruptStatsMonitor.h"
2+
#include <filesystem>
3+
#include <fstream>
4+
5+
namespace facebook {
6+
namespace dynolog {
7+
8+
using Granularity = InterruptStatsMonitor::Granularity;
9+
10+
InterruptStatsMonitor::InterruptStatsMonitor(
11+
std::shared_ptr<TTicker> ticker,
12+
const std::string& rootDir)
13+
: MonitorBase<TTicker>(std::move(ticker), "InterruptStatsMonitor", {}),
14+
rootDir_(rootDir) {
15+
cpuCount_ = (int)sysconf(_SC_NPROCESSORS_CONF);
16+
}
17+
18+
InterruptStatsMonitor InterruptStatsMonitor::makeTestClass(
19+
const std::string& rootDir) {
20+
std::shared_ptr<InterruptStatsMonitor::TTicker> intTicker =
21+
std::make_shared<InterruptStatsMonitor::TTicker>();
22+
return InterruptStatsMonitor(intTicker, rootDir);
23+
}
24+
25+
InterruptStats InterruptStatsMonitor::getInterruptStatsPerMinute() {
26+
return statsAtMinuteTick_;
27+
}
28+
29+
InterruptStats InterruptStatsMonitor::getInterruptStatsPerSecond() {
30+
return statsAtSecondTick_;
31+
}
32+
33+
InterruptStats InterruptStatsMonitor::getInterruptStats(
34+
Granularity granularity) {
35+
if (granularity == Granularity::MINUTE) {
36+
return getInterruptStatsPerMinute();
37+
} else if (granularity == Granularity::SECOND) {
38+
return getInterruptStatsPerSecond();
39+
} else {
40+
throw std::runtime_error(
41+
"InterruptStatsMonitor doesn't support requested granularity");
42+
}
43+
}
44+
45+
void InterruptStatsMonitor::tick(TMask mask) {
46+
std::unique_lock lock(dataLock_);
47+
if (TTicker::is_major_tick(mask)) {
48+
InterruptStats freshStats = interruptsRefresh();
49+
statsAtMinuteTick_ = std::move(freshStats);
50+
} else if (TTicker::is_minor_tick(mask)) {
51+
InterruptStats freshStats = interruptsRefresh();
52+
statsAtSecondTick_ = std::move(freshStats);
53+
}
54+
}
55+
56+
InterruptStats InterruptStatsMonitor::interruptsRefresh() {
57+
std::string fullPath = rootDir_ + "/proc/interrupts";
58+
59+
int64_t eth0IntrpsSum = 0, eth0Intrps;
60+
stats.eth0Intrps = 0;
61+
62+
try {
63+
if (!std::filesystem::exists(fullPath)) {
64+
LOG(ERROR) << "Path " << fullPath << " does not exist";
65+
return stats;
66+
}
67+
std::ifstream file(fullPath);
68+
if (!file.is_open()) {
69+
LOG(ERROR) << "Failed to open the file " << fullPath;
70+
return stats;
71+
}
72+
73+
// enough for /proc/interrupts line
74+
const int bufferSize = 1024;
75+
char buf[bufferSize];
76+
while (file.getline(buf, bufferSize)) {
77+
// expected buf format where xxxx refers to a TLB shootdown value for a
78+
// core One value for one core. So, the number of values should match
79+
// cpuCount_
80+
// TLB xxxx xxxx xxxx .... xxxx xxxx TLB shootdowns
81+
if (strstr(buf, "TLB shootdowns")) {
82+
std::istringstream ipStream(buf);
83+
std::string word;
84+
int64_t tlbshootdowns = 0;
85+
size_t valueCount = 0;
86+
while (ipStream >> word) {
87+
if (std::all_of(word.begin(), word.end(), ::isdigit)) {
88+
int64_t tlbshootdown = std::stoll(word);
89+
tlbshootdowns += tlbshootdown;
90+
valueCount++;
91+
}
92+
}
93+
if (valueCount == cpuCount_) {
94+
stats.tlbshootdowns = tlbshootdowns - stats.tlbshootdownsPrev;
95+
stats.tlbshootdowns += (stats.tlbshootdowns < 0) *
96+
(((stats.tlbshootdowns * -1) - 1) / UINT32_MAX + 1) * UINT32_MAX;
97+
stats.tlbshootdownsPrev = tlbshootdowns;
98+
} else {
99+
LOG(ERROR) << "CPU count from procfs interrupts: " << valueCount
100+
<< " Expected: " << cpuCount_;
101+
}
102+
}
103+
104+
// expected buf format where xxxx refers to a eth0Intrp value for a core
105+
// One value for one core. So, the number of values should match cpuCount_
106+
// IRQ#: xxxx xxxx xxxx .... xxxx xxxx PCI-MSI-edge eth0-#
107+
// In the some versions of kernel, PCI-MSI-edge part may be separated by a
108+
// space
109+
if (strstr(buf, "eth0-") || strstr(buf, "mlx5_comp")) {
110+
std::istringstream iss(buf);
111+
std::string word;
112+
int64_t eth0IntrpRow = 0;
113+
size_t valueCount = 0;
114+
while (iss >> word) {
115+
if (std::all_of(word.begin(), word.end(), ::isdigit)) {
116+
int64_t eth0Intrp = std::stoll(word);
117+
eth0IntrpRow += eth0Intrp;
118+
valueCount++;
119+
}
120+
}
121+
if (valueCount == cpuCount_) {
122+
eth0IntrpsSum += eth0IntrpRow;
123+
} else {
124+
LOG(WARNING) << "CPU count from procfs interrupts: " << valueCount
125+
<< " Expected: " << cpuCount_;
126+
}
127+
}
128+
std::fill(std::begin(buf), std::end(buf), 0);
129+
}
130+
131+
if (eth0IntrpsSum) {
132+
eth0Intrps = eth0IntrpsSum - stats.eth0IntrpsPrev;
133+
eth0Intrps += (int64_t)(eth0Intrps < 0) *
134+
(((eth0Intrps * -1) - 1) / UINT32_MAX + 1) * UINT32_MAX;
135+
stats.eth0IntrpsPrev = eth0IntrpsSum;
136+
stats.eth0Intrps = eth0Intrps;
137+
}
138+
139+
file.close();
140+
} catch (const std::exception& e) {
141+
LOG(ERROR) << "Error in reading the procfs interrupts file: " << fullPath
142+
<< " Error: " << e.what();
143+
}
144+
return stats;
145+
}
146+
147+
} // namespace dynolog
148+
} // namespace facebook
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
2+
3+
#pragma once
4+
5+
#include <dynolog/src/metric_frame/MetricFrame.h>
6+
#include <gtest/gtest_prod.h>
7+
#include <memory>
8+
#include <shared_mutex>
9+
#include "dynolog/src/MonitorBase.h"
10+
#include "dynolog/src/Ticker.h"
11+
12+
namespace facebook {
13+
namespace dynolog {
14+
15+
struct InterruptStats {
16+
int64_t tlbshootdowns, tlbshootdownsPrev;
17+
int64_t eth0Intrps, eth0IntrpsPrev;
18+
};
19+
20+
class InterruptStatsMonitor : MonitorBase<Ticker<60000, 1000, 1, 2>> {
21+
private:
22+
std::string const rootDir_;
23+
int16_t cpuCount_;
24+
std::shared_mutex dataLock_;
25+
InterruptStats stats{};
26+
InterruptStats statsAtMinuteTick_;
27+
InterruptStats statsAtSecondTick_;
28+
InterruptStats interruptsRefresh();
29+
InterruptStats getInterruptStatsPerMinute();
30+
InterruptStats getInterruptStatsPerSecond();
31+
32+
explicit InterruptStatsMonitor(const std::string& rootDir = "");
33+
34+
public:
35+
using TTicker = Ticker<60000, 1000, 1, 2>;
36+
using typename MonitorBase<TTicker>::TMask;
37+
enum class Granularity { MINUTE, SECOND };
38+
void tick(TMask mask) override;
39+
InterruptStats getInterruptStats(Granularity granularity);
40+
41+
explicit InterruptStatsMonitor(
42+
std::shared_ptr<TTicker> ticker,
43+
const std::string& rootDir = "");
44+
45+
static InterruptStatsMonitor makeTestClass(const std::string& rootDir);
46+
47+
FRIEND_TEST(InterruptStatsMonitorTest, testInterruptsRefresh);
48+
FRIEND_TEST(InterruptStatsMonitorTest, testInterruptsRefreshInvalidPath);
49+
FRIEND_TEST(
50+
InterruptStatsMonitorTest,
51+
testInterruptsRefreshIncorrectCoreCount);
52+
};
53+
54+
} // namespace dynolog
55+
56+
} // namespace facebook

0 commit comments

Comments
 (0)