diff --git a/be/src/util/cpu_info.cpp b/be/src/util/cpu_info.cpp index dfc7739c8bfef..2c0a6ef271c98 100644 --- a/be/src/util/cpu_info.cpp +++ b/be/src/util/cpu_info.cpp @@ -92,6 +92,7 @@ int CpuInfo::max_num_numa_nodes_; std::unique_ptr CpuInfo::core_to_numa_node_; std::vector> CpuInfo::numa_node_to_cores_; std::vector CpuInfo::cpuset_cores_; +std::set CpuInfo::offline_cores_; std::vector CpuInfo::numa_node_core_idx_; static struct { @@ -161,6 +162,7 @@ void CpuInfo::init() { if (num_cores > 0) { num_cores_ = num_cores; } + _init_offline_cores(); _init_num_cores_with_cgroup(); if (num_cores_ <= 0) { num_cores_ = 1; @@ -229,6 +231,40 @@ void CpuInfo::_init_numa() { _init_numa_node_to_cores(); } +std::vector CpuInfo::parse_cpus(const std::string& cpus_str) { + std::vector cpuids; + std::vector fields = strings::Split(cpus_str, ",", strings::SkipWhitespace()); + for (const auto& field : fields) { + StringParser::ParseResult result; + if (field.find('-') == std::string::npos) { + auto cpu_id = StringParser::string_to_int(field.data(), field.size(), &result); + if (result == StringParser::PARSE_SUCCESS) { + cpuids.emplace_back(cpu_id); + } + continue; + } + + std::vector pair = strings::Split(field, "-", strings::SkipWhitespace()); + if (pair.size() != 2) { + continue; + } + std::string& start_str = pair[0]; + std::string& end_str = pair[1]; + auto start = StringParser::string_to_int(start_str.data(), start_str.size(), &result); + if (result != StringParser::PARSE_SUCCESS) { + continue; + } + auto end = StringParser::string_to_int(end_str.data(), end_str.size(), &result); + if (result != StringParser::PARSE_SUCCESS) { + continue; + } + for (int i = start; i <= end; i++) { + cpuids.emplace_back(i); + } + } + return cpuids; +} + void CpuInfo::_init_num_cores_with_cgroup() { bool running_in_docker = fs::path_exist("/.dockerenv"); if (!running_in_docker) { @@ -240,40 +276,6 @@ void CpuInfo::_init_num_cores_with_cgroup() { return; } - auto parse_cpusets = [](const std::string& cpuset_str) { - std::vector cpuids; - std::vector fields = strings::Split(cpuset_str, ",", strings::SkipWhitespace()); - for (const auto& field : fields) { - StringParser::ParseResult result; - if (field.find('-') == std::string::npos) { - auto cpu_id = StringParser::string_to_int(field.data(), field.size(), &result); - if (result == StringParser::PARSE_SUCCESS) { - cpuids.emplace_back(cpu_id); - } - continue; - } - - std::vector pair = strings::Split(field, "-", strings::SkipWhitespace()); - if (pair.size() != 2) { - continue; - } - std::string& start_str = pair[0]; - std::string& end_str = pair[1]; - auto start = StringParser::string_to_int(start_str.data(), start_str.size(), &result); - if (result != StringParser::PARSE_SUCCESS) { - continue; - } - auto end = StringParser::string_to_int(end_str.data(), end_str.size(), &result); - if (result != StringParser::PARSE_SUCCESS) { - continue; - } - for (int i = start; i <= end; i++) { - cpuids.emplace_back(i); - } - } - return cpuids; - }; - std::string cfs_period_us_str; std::string cfs_quota_us_str; std::string cpuset_str; @@ -323,7 +325,8 @@ void CpuInfo::_init_num_cores_with_cgroup() { int32_t cpuset_num_cores = num_cores_; if (!cpuset_str.empty() && std::any_of(cpuset_str.begin(), cpuset_str.end(), [](char c) { return !std::isspace(c); })) { - cpuset_cores_ = parse_cpusets(cpuset_str); + cpuset_cores_ = parse_cpus(cpuset_str); + std::erase_if(cpuset_cores_, [&](const size_t core) { return offline_cores_.contains(core); }); cpuset_num_cores = cpuset_cores_.size(); is_cgroup_with_cpuset_ = true; } @@ -346,6 +349,17 @@ void CpuInfo::_init_numa_node_to_cores() { } } +void CpuInfo::_init_offline_cores() { + offline_cores_.clear(); + std::string offline_cores_str; + if (!FileUtil::read_whole_content("/sys/devices/system/cpu/offline", offline_cores_str)) { + return; + } + + std::vector offline_cores = parse_cpus(offline_cores_str); + offline_cores_.insert(offline_cores.begin(), offline_cores.end()); +} + int CpuInfo::get_current_core() { // sched_getcpu() is not supported on some old kernels/glibcs (like the versions that // shipped with CentOS 5). In that case just pretend we're always running on CPU 0 @@ -429,18 +443,42 @@ std::string CpuInfo::debug_string() { stream << " " << core << "->" << core_to_numa_node_[core] << " |"; } stream << std::endl; + + auto print_cores = [&stream](const std::string& title, const auto& cores) { + stream << " " << title << ": "; + if (cores.empty()) { + stream << "None"; + } else { + bool is_first = true; + for (const int core : cores) { + if (!is_first) { + stream << ","; + } + is_first = false; + stream << core; + } + } + stream << std::endl; + }; + + print_cores("Cores from CGroup CPUSET", cpuset_cores_); + print_cores("Offline Cores", offline_cores_); + return stream.str(); } std::vector CpuInfo::get_core_ids() { + std::vector core_ids; if (!cpuset_cores_.empty()) { - return cpuset_cores_; + core_ids = cpuset_cores_; + } else { + for (const auto& core_ids_of_node : numa_node_to_cores_) { + core_ids.insert(core_ids.end(), core_ids_of_node.begin(), core_ids_of_node.end()); + } } - std::vector core_ids; - for (const auto& core_ids_of_node : numa_node_to_cores_) { - core_ids.insert(core_ids.end(), core_ids_of_node.begin(), core_ids_of_node.end()); - } + std::erase_if(core_ids, [&](const size_t core) { return offline_cores_.contains(core); }); + return core_ids; } diff --git a/be/src/util/cpu_info.h b/be/src/util/cpu_info.h index e11081345f023..160263634ba3e 100644 --- a/be/src/util/cpu_info.h +++ b/be/src/util/cpu_info.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -87,6 +88,9 @@ class CpuInfo { static bool is_cgroup_with_cpuset() { return is_cgroup_with_cpuset_; } static bool is_cgroup_with_cpu_quota() { return is_cgroup_with_cpu_quota_; } + /// Parse a string-formatted cpus in the format "0-3,5,7-9" and return the parsed core IDs. + static std::vector parse_cpus(const std::string& cpus_str); + private: /// Initialize NUMA-related state - called from Init(); static void _init_numa(); @@ -98,6 +102,9 @@ class CpuInfo { /// 'core_to_numa_node_'. Called from InitNuma(); static void _init_numa_node_to_cores(); + /// Initialize 'core_to_numa_node_' from `/sys/devices/system/cpu/offline`. + static void _init_offline_cores(); + /// Populates the arguments with information about this machine's caches. /// The values returned are not reliable in some environments, e.g. RHEL5 on EC2, so /// so we will keep this as a private method. @@ -123,6 +130,7 @@ class CpuInfo { /// belonging to that NUMA node. static std::vector> numa_node_to_cores_; static std::vector cpuset_cores_; + static std::set offline_cores_; /// Array with 'max_num_cores_' entries, each of which is the index of that core in its /// NUMA node. diff --git a/be/test/gutil/cpu_test.cc b/be/test/gutil/cpu_test.cc index 1e45d7d7f237b..5b6276ce0921e 100644 --- a/be/test/gutil/cpu_test.cc +++ b/be/test/gutil/cpu_test.cc @@ -16,6 +16,8 @@ #include +#include "util/cpu_info.h" + namespace starrocks { TEST(CpuInfoTest, hardware_support) { @@ -44,4 +46,32 @@ TEST(CpuInfoTest, hardware_support) { #endif } +TEST(CpuInfoTest, parse_cpus) { + auto assert_cpu_equals = [](std::vector& cpus, std::vector& expected_cpus) { + ASSERT_EQ(expected_cpus.size(), cpus.size()); + std::ranges::sort(cpus); + std::ranges::sort(expected_cpus); + for (size_t i = 0; i < cpus.size(); ++i) { + EXPECT_EQ(expected_cpus[i], cpus[i]); + } + }; + + { + std::vector cpus = CpuInfo::parse_cpus("0-3,5,7,9-10"); + std::vector expected_cpus = {0, 1, 2, 3, 5, 7, 9, 10}; + assert_cpu_equals(cpus, expected_cpus); + } + + { + const std::vector cpus = CpuInfo::parse_cpus(""); + EXPECT_TRUE(cpus.empty()); + } + + { + std::vector cpus = CpuInfo::parse_cpus("abc,1-,2-abc,3-5,,8"); + std::vector expected_cpus = {3, 4, 5, 8}; + assert_cpu_equals(cpus, expected_cpus); + } +} + } // namespace starrocks