Skip to content

Commit 30a0388

Browse files
Scott8440meta-codesync[bot]
authored andcommitted
Ship failure logs to Scuba
Summary: Added Scuba logging for Platform Manager exploration errors Build considerations: * OSS build is unaffected by using a no-op implementation in `oss/ScubaLogger.cpp` Memory usage: * Previous issues were encountered because including a logging library ended up causing OOMs. I ran a quick analysis of two PM runs one with Scuba and one without - saw minimal difference: ``` [root@fboss332610281.ash7 ~]# cat memory_a.log Peak RSS: 424584 KB [root@fboss332610281.ash7 ~]# cat memory_b.log Peak RSS: 428384 KB ``` * Log calls are wrapped in try/catch to prevent any failures from Scuba propagating back up and causing problems - Logs are sent to the `perfpipe_fboss_platform_manager_errors` Scuba category - Each error log includes: - Device hostname (from NetworkUtil::getLocalHost) - Platform name - Device path - Error type - Error message - Exploration status - Timestamp Reviewed By: RomanChoporov-Meta Differential Revision: D85386916 fbshipit-source-id: 3b67dd7563178f1ac35555621daf2432128f50f6
1 parent 6c6bfc2 commit 30a0388

File tree

8 files changed

+210
-42
lines changed

8 files changed

+210
-42
lines changed

cmake/PlatformPlatformManager.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ target_link_libraries(platform_manager_device_path_resolver
140140
weutil_eeprom_contents_cpp2
141141
)
142142

143+
add_library(scuba_logger
144+
fboss/platform/platform_manager/oss/ScubaLogger.cpp
145+
)
146+
147+
target_link_libraries(scuba_logger
148+
fmt::fmt
149+
)
150+
143151
add_library(platform_manager_platform_explorer
144152
fboss/platform/platform_manager/PlatformExplorer.cpp
145153
fboss/platform/platform_manager/ExplorationSummary.cpp
@@ -159,6 +167,7 @@ target_link_libraries(platform_manager_platform_explorer
159167
weutil_fboss_eeprom_interface
160168
ioctl_smbus_eeprom_reader
161169
Folly::folly
170+
scuba_logger
162171
)
163172

164173
add_library(platform_manager_config_validator
@@ -222,6 +231,7 @@ target_link_libraries(platform_manager
222231
${SYSTEMD}
223232
gpiod_line
224233
range-v3
234+
scuba_logger
225235
)
226236

227237
install(TARGETS platform_manager)

fboss/platform/platform_manager/BUCK

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ cpp_library(
168168
":platform_manager_config-cpp2-types",
169169
":platform_manager_service-cpp2-types",
170170
":presence_checker",
171+
":scuba_logger",
171172
":utils",
172173
"//fb303:service_data",
173174
"//fboss/platform/helpers:platform_fs_utils",
@@ -179,6 +180,19 @@ cpp_library(
179180
],
180181
)
181182

183+
cpp_library(
184+
name = "scuba_logger",
185+
srcs = [
186+
"ScubaLogger.cpp",
187+
],
188+
exported_deps = [
189+
"//common/network:util",
190+
"//folly/json:dynamic",
191+
"//folly/logging:logging",
192+
"//scribe/client:scribe_client_no_sr",
193+
],
194+
)
195+
182196
cpp_library(
183197
name = "utils",
184198
srcs = [
@@ -253,6 +267,7 @@ cpp_binary(
253267
":config_utils",
254268
":pkg_manager",
255269
":platform_manager_handler",
270+
":scuba_logger",
256271
"//fb303:logging",
257272
"//fb303:service_data",
258273
"//fboss/platform/helpers:init",

fboss/platform/platform_manager/ExplorationSummary.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <folly/logging/xlog.h>
77
#include <re2/re2.h>
88

9+
#include "fboss/platform/platform_manager/ScubaLogger.h"
910
#include "fboss/platform/platform_manager/Utils.h"
1011

1112
namespace facebook::fboss::platform::platform_manager {
@@ -92,6 +93,25 @@ void ExplorationSummary::publishCounters(ExplorationStatus finalStatus) {
9293
}
9394
}
9495

96+
void ExplorationSummary::publishToScuba(ExplorationStatus finalStatus) {
97+
// Log individual errors
98+
for (const auto& [devicePath, explorationErrors] : devicePathToErrors_) {
99+
for (const auto& error : explorationErrors) {
100+
std::unordered_map<std::string, std::string> normals;
101+
102+
normals["platform"] = *platformConfig_.platformName();
103+
normals["device_path"] = devicePath;
104+
normals["event"] = *error.errorType();
105+
normals["error_message"] = *error.message();
106+
normals["exploration_status"] =
107+
apache::thrift::util::enumNameSafe(finalStatus);
108+
109+
ScubaLogger::log(normals);
110+
XLOG(INFO) << "Logged Platform Manager error to Scuba: " << devicePath;
111+
}
112+
}
113+
}
114+
95115
ExplorationStatus ExplorationSummary::summarize() {
96116
ExplorationStatus finalStatus = ExplorationStatus::FAILED;
97117
if (devicePathToErrors_.empty() && devicePathToExpectedErrors_.empty()) {
@@ -102,6 +122,7 @@ ExplorationStatus ExplorationSummary::summarize() {
102122
// Exploration summary reporting.
103123
print(finalStatus);
104124
publishCounters(finalStatus);
125+
publishToScuba(finalStatus);
105126
return finalStatus;
106127
}
107128

fboss/platform/platform_manager/ExplorationSummary.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,5 +124,6 @@ class ExplorationSummary {
124124

125125
void print(ExplorationStatus finalStatus);
126126
void publishCounters(ExplorationStatus finalStatus);
127+
void publishToScuba(ExplorationStatus finalStatus);
127128
};
128129
} // namespace facebook::fboss::platform::platform_manager

fboss/platform/platform_manager/Main.cpp

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "fboss/platform/platform_manager/ConfigUtils.h"
1313
#include "fboss/platform/platform_manager/PkgManager.h"
1414
#include "fboss/platform/platform_manager/PlatformManagerHandler.h"
15+
#include "fboss/platform/platform_manager/ScubaLogger.h"
1516

1617
using namespace facebook;
1718
using namespace facebook::fboss::platform;
@@ -49,54 +50,67 @@ int main(int argc, char** argv) {
4950
fb303::registerFollyLoggingOptionHandlers();
5051
helpers::init(&argc, &argv);
5152

52-
auto config = ConfigUtils().getConfig();
53+
try {
54+
auto config = ConfigUtils().getConfig();
55+
56+
PkgManager pkgManager(config);
57+
pkgManager.processAll();
58+
PlatformExplorer platformExplorer(config);
59+
platformExplorer.explore();
60+
if (FLAGS_run_once) {
61+
XLOG(INFO) << fmt::format(
62+
"Ran with --run_once={}. Skipping running as a daemon...",
63+
FLAGS_run_once);
64+
return 0;
65+
}
5366

54-
PkgManager pkgManager(config);
55-
pkgManager.processAll();
56-
PlatformExplorer platformExplorer(config);
57-
platformExplorer.explore();
58-
if (FLAGS_run_once) {
59-
XLOG(INFO) << fmt::format(
60-
"Ran with --run_once={}. Skipping running as a daemon...",
61-
FLAGS_run_once);
62-
return 0;
63-
}
67+
// When systemd starts PlatformManager, it sets the below env in PM
68+
// environment. This is a path to Unix domain socket at /run/systemd/notify.
69+
if (!FLAGS_run_in_netos) {
70+
const auto notifySocketEnv{"NOTIFY_SOCKET"};
71+
if (std::getenv(notifySocketEnv)) {
72+
sdNotifyReady();
73+
} else {
74+
XLOG(WARNING) << fmt::format(
75+
"Skipping sd_notify since ${} is not set which does not "
76+
"imply systemd execution.",
77+
notifySocketEnv);
78+
}
79+
}
6480

65-
// When systemd starts PlatformManager, it sets the below env in PM
66-
// environment. This is a path to Unix domain socket at /run/systemd/notify.
67-
if (!FLAGS_run_in_netos) {
68-
const auto notifySocketEnv{"NOTIFY_SOCKET"};
69-
if (std::getenv(notifySocketEnv)) {
70-
sdNotifyReady();
71-
} else {
72-
XLOG(WARNING) << fmt::format(
73-
"Skipping sd_notify since ${} is not set which does not "
74-
"imply systemd execution.",
75-
notifySocketEnv);
81+
std::optional<DataStore> ds = platformExplorer.getDataStore();
82+
if (!ds.has_value()) {
83+
XLOG(ERR)
84+
<< "PlatformExplorer did not succeed. Not starting Thrift Service.";
85+
return 1;
7686
}
77-
}
87+
XLOG(INFO) << "Running PlatformManager thrift service...";
7888

79-
std::optional<DataStore> ds = platformExplorer.getDataStore();
80-
if (!ds.has_value()) {
81-
XLOG(ERR)
82-
<< "PlatformExplorer did not succeed. Not starting Thrift Service.";
83-
return 1;
84-
}
85-
XLOG(INFO) << "Running PlatformManager thrift service...";
89+
auto server = std::make_shared<apache::thrift::ThriftServer>();
90+
auto handler = std::make_shared<PlatformManagerHandler>(
91+
platformExplorer, ds.value(), config);
92+
server->setPort(FLAGS_thrift_port);
93+
server->setInterface(handler);
94+
server->setAllowPlaintextOnLoopback(true);
95+
96+
auto evb = server->getEventBaseManager()->getEventBase();
97+
helpers::SignalHandler signalHandler(evb, server);
8698

87-
auto server = std::make_shared<apache::thrift::ThriftServer>();
88-
auto handler = std::make_shared<PlatformManagerHandler>(
89-
platformExplorer, ds.value(), config);
90-
server->setPort(FLAGS_thrift_port);
91-
server->setInterface(handler);
92-
server->setAllowPlaintextOnLoopback(true);
99+
helpers::runThriftService(
100+
server, handler, "PlatformManagerService", FLAGS_thrift_port);
93101

94-
auto evb = server->getEventBaseManager()->getEventBase();
95-
helpers::SignalHandler signalHandler(evb, server);
102+
XLOG(INFO) << "================ STOPPED PLATFORM BINARY ================";
103+
return 0;
104+
} catch (const std::exception& ex) {
105+
// Log unexpected crash to Scuba
106+
std::unordered_map<std::string, std::string> normals;
96107

97-
helpers::runThriftService(
98-
server, handler, "PlatformManagerService", FLAGS_thrift_port);
108+
normals["event"] = "unexpected_crash";
109+
normals["error_message"] = ex.what();
99110

100-
XLOG(INFO) << "================ STOPPED PLATFORM BINARY ================";
101-
return 0;
111+
ScubaLogger::log(normals);
112+
XLOG(FATAL) << "Platform Manager crashed with unexpected exception: "
113+
<< ex.what();
114+
return 1;
115+
}
102116
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
2+
3+
#include "fboss/platform/platform_manager/ScubaLogger.h"
4+
5+
#include <chrono>
6+
7+
#include <folly/json/dynamic.h>
8+
#include <folly/json/json.h>
9+
#include <folly/logging/xlog.h>
10+
#include "common/network/NetworkUtil.h"
11+
#include "scribe/client/ScribeClient.h"
12+
13+
namespace facebook::fboss::platform::platform_manager {
14+
namespace ScubaLogger {
15+
16+
void log(
17+
const std::unordered_map<std::string, std::string>& normals,
18+
const std::unordered_map<std::string, int64_t>& ints,
19+
const std::string& category) {
20+
folly::dynamic normalsObj = folly::dynamic::object;
21+
normalsObj["hostname"] = network::NetworkUtil::getLocalHost(true);
22+
for (const auto& [key, value] : normals) {
23+
normalsObj[key] = value;
24+
}
25+
26+
folly::dynamic intsObj = folly::dynamic::object;
27+
intsObj["time"] = std::chrono::duration_cast<std::chrono::seconds>(
28+
std::chrono::system_clock::now().time_since_epoch())
29+
.count();
30+
for (const auto& [key, value] : ints) {
31+
intsObj[key] = value;
32+
}
33+
34+
folly::dynamic scubaObj = folly::dynamic::object;
35+
scubaObj["normal"] = normalsObj;
36+
scubaObj["int"] = intsObj;
37+
38+
try {
39+
scribe::ScribeClient::getLite()->offer(category, folly::toJson(scubaObj));
40+
XLOG(DBG2) << "Successfully logged to Scuba (" << category
41+
<< "): " << folly::toPrettyJson(scubaObj);
42+
} catch (const std::exception& e) {
43+
XLOG(ERR) << "Exception: " << folly::exceptionStr(e)
44+
<< " while logging to Scuba (" << category
45+
<< "): " << folly::toPrettyJson(scubaObj);
46+
}
47+
}
48+
49+
} // namespace ScubaLogger
50+
} // namespace facebook::fboss::platform::platform_manager
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
2+
3+
#pragma once
4+
5+
#include <string>
6+
#include <unordered_map>
7+
8+
namespace facebook::fboss::platform::platform_manager {
9+
10+
/**
11+
* ScubaLogger provides utilities for logging data to Scuba.
12+
* In OSS builds, this is a no-op stub.
13+
* In internal builds, this logs data via ScribeClient.
14+
*/
15+
namespace ScubaLogger {
16+
17+
// Default Scuba category for Platform Manager logs
18+
constexpr const char* kDefaultCategory = "perfpipe_fboss_platform_manager";
19+
20+
/**
21+
* Log an event to Scuba with normal (string) and int fields.
22+
*
23+
* Note: The following fields are automatically added:
24+
* - "hostname" (string): The hostname from getLocalHost()
25+
* - "time" (int): Unix timestamp in seconds
26+
*
27+
* @param normals Map of string field names to string values
28+
* @param ints Map of string field names to int64_t values
29+
* @param category The Scuba category to log to
30+
*/
31+
void log(
32+
const std::unordered_map<std::string, std::string>& normals,
33+
const std::unordered_map<std::string, int64_t>& ints = {},
34+
const std::string& category = kDefaultCategory);
35+
36+
} // namespace ScubaLogger
37+
38+
} // namespace facebook::fboss::platform::platform_manager
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
2+
3+
#include "fboss/platform/platform_manager/ScubaLogger.h"
4+
5+
#include <folly/logging/xlog.h>
6+
7+
namespace facebook::fboss::platform::platform_manager {
8+
namespace ScubaLogger {
9+
10+
void log(
11+
const std::unordered_map<std::string, std::string>& normals,
12+
const std::unordered_map<std::string, int64_t>& ints,
13+
const std::string& category) {
14+
// No-op: Scuba logging is not available in OSS builds
15+
XLOG(DBG3) << "Scuba logging is not available in OSS builds. ";
16+
}
17+
18+
} // namespace ScubaLogger
19+
} // namespace facebook::fboss::platform::platform_manager

0 commit comments

Comments
 (0)