Skip to content

Commit 9be0e3c

Browse files
William-AnJRPan
andauthored
add the same range filter in tracer tool to spinlock tool (#523)
Co-authored-by: JRPan <25518778+JRPan@users.noreply.github.com>
1 parent f0e4574 commit 9be0e3c

File tree

1 file changed

+178
-27
lines changed

1 file changed

+178
-27
lines changed

util/tracer_nvbit/others/spinlock_tool/spinlock_tool.cu

Lines changed: 178 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include <unordered_map>
5858
#include <unordered_set>
5959
#include <filesystem>
60+
#include <regex>
6061

6162
/* every tool needs to include this once */
6263
#include "nvbit_tool.h"
@@ -138,6 +139,117 @@ std::string spinlock_run_dir = "./";
138139
int spinlock_keep_intermediate_files = 0;
139140
void spinlock_check();
140141

142+
/* Kernel range filter */
143+
// Maybe move these to a util lib for all tracer tools?
144+
std::string kernel_ranges = "";
145+
146+
struct KernelRange {
147+
uint64_t start;
148+
uint64_t end; // UINT64_MAX means open-ended
149+
std::vector<std::regex>
150+
kernel_name_regexes; // Vector of regexes for multiple patterns
151+
};
152+
std::vector<KernelRange> g_kernel_ranges;
153+
uint64_t g_max_kernel_id = 0;
154+
void parse_kernel_ranges_from_env() {
155+
g_kernel_ranges.clear();
156+
g_max_kernel_id = 0;
157+
158+
const char *env_var = std::getenv("DYNAMIC_KERNEL_RANGE");
159+
if (!env_var || std::string(env_var).empty()) {
160+
g_kernel_ranges.push_back({0, 0, {std::regex(".*")}}); // 0 end = trace all
161+
return;
162+
}
163+
std::string input(env_var);
164+
std::istringstream stream(input);
165+
std::string token;
166+
167+
while (stream >> token) {
168+
if (token.empty())
169+
continue;
170+
171+
uint64_t start = 0, end = 0;
172+
std::vector<std::regex> regexes;
173+
174+
size_t at_pos = token.find('@');
175+
std::string range_part, regex_part;
176+
177+
if (at_pos != std::string::npos) {
178+
range_part = token.substr(0, at_pos);
179+
regex_part = token.substr(at_pos + 1);
180+
} else {
181+
range_part = token;
182+
}
183+
184+
// Parse the range
185+
if (!range_part.empty()) {
186+
size_t dash_pos = range_part.find('-');
187+
if (dash_pos != std::string::npos) {
188+
std::string start_str = range_part.substr(0, dash_pos);
189+
std::string end_str = range_part.substr(dash_pos + 1);
190+
191+
start = std::stoull(start_str);
192+
if (!end_str.empty()) {
193+
end = std::stoull(end_str);
194+
} else {
195+
end = 0; // open-ended
196+
}
197+
} else {
198+
start = std::stoull(range_part);
199+
end = start;
200+
}
201+
} else {
202+
// No range → match all IDs
203+
start = 0;
204+
end = 0;
205+
}
206+
207+
// Parse the regexes
208+
if (!regex_part.empty()) {
209+
std::istringstream regex_stream(regex_part);
210+
std::string regex_token;
211+
while (std::getline(regex_stream, regex_token, ',')) {
212+
try {
213+
regexes.emplace_back(regex_token);
214+
} catch (const std::regex_error &e) {
215+
std::cerr << "Invalid regex: " << regex_token << std::endl;
216+
}
217+
}
218+
} else {
219+
regexes.emplace_back(".*"); // match all kernel names
220+
}
221+
222+
g_kernel_ranges.push_back({start, end, regexes});
223+
if (end > g_max_kernel_id) {
224+
g_max_kernel_id = end;
225+
}
226+
}
227+
}
228+
229+
bool should_trace_kernel(uint64_t kernel_id, const std::string &kernel_name) {
230+
for (const auto &range : g_kernel_ranges) {
231+
// Check range for kernel ID
232+
if (range.end == 0) {
233+
if (kernel_id >= range.start) {
234+
// Match any of the regexes for this range
235+
for (const auto &regex : range.kernel_name_regexes) {
236+
if (std::regex_match(kernel_name, regex)) {
237+
return true;
238+
}
239+
}
240+
}
241+
} else if (kernel_id >= range.start && kernel_id <= range.end) {
242+
// Match any of the regexes for this range
243+
for (const auto &regex : range.kernel_name_regexes) {
244+
if (std::regex_match(kernel_name, regex)) {
245+
return true;
246+
}
247+
}
248+
}
249+
}
250+
return false;
251+
}
252+
141253
void* recv_thread_fun(void* args);
142254

143255
void nvbit_at_init() {
@@ -152,6 +264,16 @@ void nvbit_at_init() {
152264
GET_VAR_INT(spinlock_phase, "SPINLOCK_PHASE", 0, "Spinlock phase");
153265
GET_VAR_STR(spinlock_run_dir, "TRACES_FOLDER", "Spinlock detection base directory, use the same as the traces folder");
154266
GET_VAR_INT(spinlock_keep_intermediate_files, "SPINLOCK_KEEP_INTERMEDIATE_FILES", 0, "Keep intermediate files");
267+
GET_VAR_STR(
268+
kernel_ranges, "DYNAMIC_KERNEL_RANGE",
269+
"Specify kernel IDs or ranges to trace. Format:\n"
270+
" - Single ID: \"2\" traces only kernel 2.\n"
271+
" - Range: \"5-8\" traces kernels 5 through 8 (inclusive).\n"
272+
" - Open-ended: \"10-\" traces from kernel 10 onward.\n"
273+
" - Multiple ranges: \"2 5-8 10-\" (space-separated).\n"
274+
" - With regex: \"5-8@kernel_a.*,kernel_b.*\" traces kernels 5-8 "
275+
"with matching names.\n"
276+
"If unset or empty, all kernels will be traced from the beginning.");
155277
std::string pad(100, '-');
156278
printf("%s\n", pad.c_str());
157279

@@ -167,6 +289,9 @@ void nvbit_at_init() {
167289
if (!spinlock_run_dir.empty()) {
168290
spinlock_run_dir += "/";
169291
}
292+
293+
// Parse the kernel ranges
294+
parse_kernel_ranges_from_env();
170295
}
171296

172297
/**
@@ -179,6 +304,7 @@ void nvbit_at_init() {
179304
void nvbit_at_term() {
180305
// Read the spinlock_run_PHASE dir under ctx_<ctx_id> and for each unique kernel name,
181306
// we will have a vector of kernel histograms
307+
printf("Spinlock: Start to merge histograms from %s\n", spinlock_run_dir.c_str());
182308
using HistogramMapByName = std::map<std::string, std::vector<KernelInstructionHistogram*>>;
183309
HistogramMapByName map;
184310

@@ -192,6 +318,7 @@ void nvbit_at_term() {
192318

193319
// Now we iterate the spinlock_run_PHASE dir under ctx_<ctx_id> folder
194320
std::string context_run_dir = folder.path().string() + "/spinlock_run_" + std::to_string(spinlock_phase);
321+
DPRINTF("Spinlock: Read saved histograms from %s\n", context_run_dir.c_str());
195322

196323
// Build this histogram vector for this context
197324
for (auto& file : std::filesystem::directory_iterator(context_run_dir)) {
@@ -201,11 +328,15 @@ void nvbit_at_term() {
201328
map[histogram->name].push_back(histogram);
202329
}
203330
}
331+
332+
DPRINTF("Spinlock: Read %zu kernels from %s\n", map.size(), context_run_dir.c_str());
333+
204334
}
205335

206336
// Now, we merge all the histograms for each kernel name
207337
std::vector<KernelInstructionHistogram*> merged_histograms;
208338
size_t id = 0;
339+
DPRINTF("Spinlock: Start to merge histograms\n");
209340
for (auto& [kernel_name, histograms] : map) {
210341
KernelInstructionHistogram* merged_histogram = new KernelInstructionHistogram();
211342
// Set the name to the kernel name
@@ -218,6 +349,7 @@ void nvbit_at_term() {
218349
}
219350
merged_histograms.push_back(merged_histogram);
220351
}
352+
DPRINTF("Spinlock: Merged %zu kernels\n", merged_histograms.size());
221353

222354
// For each merged histogram, save under spinlock_run_PHASE_merged dir
223355
std::string merged_run_dir = spinlock_run_dir + "spinlock_detection/spinlock_run_" + std::to_string(spinlock_phase) + "_merged";
@@ -228,6 +360,7 @@ void nvbit_at_term() {
228360
assert(false);
229361
}
230362

363+
DPRINTF("Spinlock: Start to save merged histograms to %s\n", merged_run_dir.c_str());
231364
for (auto& histogram : merged_histograms) {
232365
histogram->saveToFile(merged_run_dir + "/kernel-" + std::to_string(histogram->id) + ".histogram");
233366
}
@@ -244,6 +377,7 @@ void nvbit_at_term() {
244377

245378
// Check for spinlock
246379
if (spinlock_phase == SPINLOCK_PHASE_CHECK) {
380+
DPRINTF("Spinlock: Start to check for spinlock\n");
247381
spinlock_check();
248382
}
249383
}
@@ -346,16 +480,25 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func,
346480
assert(cudaGetLastError() == cudaSuccess);
347481
}
348482

483+
// Plus 1 since tracer_tool use 1-based kernel id
484+
uint64_t kernel_id = grid_launch_id + 1;
485+
std::string mangled_func_name = std::string(nvbit_get_func_name(ctx, func, true));
486+
349487
// Initialize kernel instruction histogram map
350488
if (ctx_state->instr_histogram == nullptr) {
351-
ctx_state->instr_histogram = new KernelInstructionHistogram(grid_launch_id, nvbit_get_func_name(ctx, func, true));
489+
ctx_state->instr_histogram = new KernelInstructionHistogram(kernel_id, mangled_func_name);
352490
} else {
353-
ctx_state->instr_histogram->reinit(grid_launch_id, nvbit_get_func_name(ctx, func, true));
491+
ctx_state->instr_histogram->reinit(kernel_id, mangled_func_name);
354492
}
355493

356494
/* instrument */
357495
instrument_function_if_needed(ctx, func);
358496

497+
/* Determine if need to enable instrumentation */
498+
// Plus 1 since tracer_tool use 1-based kernel id
499+
bool enable_instrumentation = should_trace_kernel(kernel_id, mangled_func_name);
500+
bool disable_print = !enable_instrumentation;
501+
359502
int nregs = 0;
360503
CUDA_SAFECALL(
361504
cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, func));
@@ -379,29 +522,33 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func,
379522
if (cbid == API_CUDA_cuLaunchKernelEx_ptsz ||
380523
cbid == API_CUDA_cuLaunchKernelEx) {
381524
cuLaunchKernelEx_params* p = (cuLaunchKernelEx_params*)params;
382-
printf(
383-
"Spinlock: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - "
384-
"Kernel name %s - grid launch id %ld - grid size %d,%d,%d "
385-
"- block size %d,%d,%d - nregs %d - shmem %d - cuda stream "
386-
"id %ld\n",
387-
(uint64_t)ctx, pc, func_name, grid_launch_id,
388-
p->config->gridDimX, p->config->gridDimY,
389-
p->config->gridDimZ, p->config->blockDimX,
390-
p->config->blockDimY, p->config->blockDimZ, nregs,
391-
shmem_static_nbytes + p->config->sharedMemBytes,
392-
(uint64_t)p->config->hStream);
525+
if (!disable_print) {
526+
printf(
527+
"Spinlock: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - "
528+
"Kernel name %s - grid launch id %ld - grid size %d,%d,%d "
529+
"- block size %d,%d,%d - nregs %d - shmem %d - cuda stream "
530+
"id %ld\n",
531+
(uint64_t)ctx, pc, func_name, grid_launch_id,
532+
p->config->gridDimX, p->config->gridDimY,
533+
p->config->gridDimZ, p->config->blockDimX,
534+
p->config->blockDimY, p->config->blockDimZ, nregs,
535+
shmem_static_nbytes + p->config->sharedMemBytes,
536+
(uint64_t)p->config->hStream);
537+
}
393538
} else {
394539
cuLaunchKernel_params* p = (cuLaunchKernel_params*)params;
395-
printf(
396-
"Spinlock: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - "
397-
"Kernel name %s - grid launch id %ld - grid size %d,%d,%d "
398-
"- block size %d,%d,%d - nregs %d - shmem %d - cuda stream "
399-
"id %ld\n",
400-
(uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX,
401-
p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY,
402-
p->blockDimZ, nregs,
403-
shmem_static_nbytes + p->sharedMemBytes,
404-
(uint64_t)p->hStream);
540+
if (!disable_print) {
541+
printf(
542+
"Spinlock: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - "
543+
"Kernel name %s - grid launch id %ld - grid size %d,%d,%d "
544+
"- block size %d,%d,%d - nregs %d - shmem %d - cuda stream "
545+
"id %ld\n",
546+
(uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX,
547+
p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY,
548+
p->blockDimZ, nregs,
549+
shmem_static_nbytes + p->sharedMemBytes,
550+
(uint64_t)p->hStream);
551+
}
405552
}
406553

407554
// increment grid launch id for next launch
@@ -410,8 +557,7 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func,
410557
grid_launch_id++;
411558
}
412559

413-
/* enable instrumented code to run */
414-
nvbit_enable_instrumented(ctx, func, true);
560+
nvbit_enable_instrumented(ctx, func, enable_instrumentation);
415561

416562
// Reset the kernel receiving done flag for new kernel launch
417563
ctx_state->kernel_receiving_done = false;
@@ -450,8 +596,13 @@ static void leave_kernel_launch(CTXstate *ctx_state, uint64_t &grid_launch_id) {
450596
}
451597

452598
// Save the histogram to file in form of kernel-<kernel_id>.histogram
453-
bool success = ctx_state->instr_histogram->saveToFile( folder_name + "/" + "kernel-" + std::to_string(ctx_state->instr_histogram->id) + ".histogram");
454-
assert(success);
599+
// if we have specified to trace this kernel
600+
uint64_t kernel_id = ctx_state->instr_histogram->id;
601+
bool enable_save = should_trace_kernel(kernel_id, ctx_state->instr_histogram->name);
602+
if (enable_save) {
603+
bool success = ctx_state->instr_histogram->saveToFile( folder_name + "/" + "kernel-" + std::to_string(kernel_id) + ".histogram");
604+
assert(success);
605+
}
455606
}
456607

457608
void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid,

0 commit comments

Comments
 (0)