Skip to content

Commit 5bdc811

Browse files
authored
Merge branch '2.18' into backport-11759-to-2.18
2 parents 15a89f5 + c0bb6bd commit 5bdc811

26 files changed

+515
-118
lines changed

Diff for: ddtrace/contrib/internal/asgi/middleware.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,18 @@ async def _blocked_asgi_app(scope, receive, send):
9191
await send({"type": "http.response.body", "body": b""})
9292

9393

94+
def _parse_response_cookies(response_headers):
95+
cookies = {}
96+
try:
97+
result = response_headers.get("set-cookie", "").split("=", maxsplit=1)
98+
if len(result) == 2:
99+
cookie_key, cookie_value = result
100+
cookies[cookie_key] = cookie_value
101+
except Exception:
102+
log.debug("failed to extract response cookies", exc_info=True)
103+
return cookies
104+
105+
94106
class TraceMiddleware:
95107
"""
96108
ASGI application middleware that traces the requests.
@@ -211,7 +223,6 @@ async def __call__(self, scope, receive, send):
211223
peer_ip = client[0]
212224
else:
213225
peer_ip = None
214-
215226
trace_utils.set_http_meta(
216227
span,
217228
self.integration_config,
@@ -234,15 +245,8 @@ async def wrapped_send(message):
234245
except Exception:
235246
log.warning("failed to extract response headers", exc_info=True)
236247
response_headers = None
237-
238248
if span and message.get("type") == "http.response.start" and "status" in message:
239-
cookies = {}
240-
try:
241-
cookie_key, cookie_value = response_headers.get("set-cookie", "").split("=", maxsplit=1)
242-
cookies[cookie_key] = cookie_value
243-
except Exception:
244-
log.debug("failed to extract response cookies", exc_info=True)
245-
249+
cookies = _parse_response_cookies(response_headers)
246250
status_code = message["status"]
247251
trace_utils.set_http_meta(
248252
span,

Diff for: ddtrace/contrib/internal/celery/app.py

-11
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,6 @@ def _traced_apply_async_inner(func, instance, args, kwargs):
133133
if task_span:
134134
task_span.set_exc_info(*sys.exc_info())
135135

136-
prerun_span = core.get_item("prerun_span")
137-
if prerun_span:
138-
prerun_span.set_exc_info(*sys.exc_info())
139-
140136
raise
141137
finally:
142138
task_span = core.get_item("task_span")
@@ -147,11 +143,4 @@ def _traced_apply_async_inner(func, instance, args, kwargs):
147143
)
148144
task_span.finish()
149145

150-
prerun_span = core.get_item("prerun_span")
151-
if prerun_span:
152-
log.debug(
153-
"The task_postrun signal was not called, so manually closing span: %s", prerun_span._pprint()
154-
)
155-
prerun_span.finish()
156-
157146
return _traced_apply_async_inner

Diff for: ddtrace/contrib/internal/celery/signals.py

-3
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ def trace_prerun(*args, **kwargs):
5454
service = config.celery["worker_service_name"]
5555
span = pin.tracer.trace(c.WORKER_ROOT_SPAN, service=service, resource=task.name, span_type=SpanTypes.WORKER)
5656

57-
# Store an item called "prerun span" in case task_postrun doesn't get called
58-
core.set_item("prerun_span", span)
59-
6057
# set span.kind to the type of request being performed
6158
span.set_tag_str(SPAN_KIND, SpanKind.CONSUMER)
6259

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/include/uploader_builder.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class UploaderBuilder
4343
static void set_output_filename(std::string_view _output_filename);
4444

4545
static std::variant<Uploader, std::string> build();
46+
47+
static void postfork_child();
4648
};
4749

4850
} // namespace Datadog

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/src/code_provenance.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@ namespace Datadog {
1414
void
1515
Datadog::CodeProvenance::postfork_child()
1616
{
17-
get_instance().mtx.~mutex(); // Destroy the mutex
17+
// NB placement-new to re-init and leak the mutex because doing anything else is UB
1818
new (&get_instance().mtx) std::mutex(); // Recreate the mutex
19-
get_instance().reset(); // Reset the state
2019
}
2120

2221
void

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/src/ddup_interface.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ ddup_postfork_child()
2424
Datadog::Uploader::postfork_child();
2525
Datadog::SampleManager::postfork_child();
2626
Datadog::CodeProvenance::postfork_child();
27+
Datadog::UploaderBuilder::postfork_child();
2728
}
2829

2930
void

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,6 @@ Datadog::Profile::collect(const ddog_prof_Sample& sample, int64_t endtime_ns)
186186
void
187187
Datadog::Profile::postfork_child()
188188
{
189-
profile_mtx.unlock();
189+
new (&profile_mtx) std::mutex();
190190
cycle_buffers();
191191
}

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -175,5 +175,6 @@ Datadog::Uploader::postfork_parent()
175175
void
176176
Datadog::Uploader::postfork_child()
177177
{
178-
unlock();
178+
// NB placement-new to re-init and leak the mutex because doing anything else is UB
179+
new (&upload_lock) std::mutex();
179180
}

Diff for: ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,10 @@ Datadog::UploaderBuilder::build()
186186

187187
return Datadog::Uploader{ output_filename, ddog_exporter };
188188
}
189+
190+
void
191+
Datadog::UploaderBuilder::postfork_child()
192+
{
193+
// NB placement-new to re-init and leak the mutex because doing anything else is UB
194+
new (&tag_mutex) std::mutex();
195+
}

Diff for: ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ _stack_v2_atfork_child()
6767
// so we don't even reveal this function to the user
6868
_set_pid(getpid());
6969
ThreadSpanLinks::postfork_child();
70+
71+
// `thread_info_map_lock` and `task_link_map_lock` are global locks held in echion
72+
// NB placement-new to re-init and leak the mutex because doing anything else is UB
73+
new (&thread_info_map_lock) std::mutex;
74+
new (&task_link_map_lock) std::mutex;
7075
}
7176

7277
__attribute__((constructor)) void

Diff for: ddtrace/internal/datadog/profiling/stack_v2/src/thread_span_links.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,8 @@ ThreadSpanLinks::reset()
5353
void
5454
ThreadSpanLinks::postfork_child()
5555
{
56-
// Explicitly destroy and reconstruct the mutex to avoid undefined behavior
57-
get_instance().mtx.~mutex();
56+
// NB placement-new to re-init and leak the mutex because doing anything else is UB
5857
new (&get_instance().mtx) std::mutex();
59-
6058
get_instance().reset();
6159
}
6260

Diff for: ddtrace/profiling/collector/_memalloc.c

+84-41
Original file line numberDiff line numberDiff line change
@@ -42,47 +42,95 @@ static PyObject* object_string = NULL;
4242

4343
#define ALLOC_TRACKER_MAX_COUNT UINT64_MAX
4444

45+
// The data coordination primitives in this and related files are related to a crash we started seeing.
46+
// We don't have a precise understanding of the causal factors within the runtime that lead to this condition,
47+
// since the GIL alone was sufficient in the past for preventing this issue.
48+
// We add an option here to _add_ a crash, in order to observe this condition in a future diagnostic iteration.
49+
// **This option is _intended_ to crash the Python process** do not use without a good reason!
50+
static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_MUTEX_PASS";
51+
static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL
52+
static memlock_t g_memalloc_lock;
53+
4554
static alloc_tracker_t* global_alloc_tracker;
4655

56+
// This is a multiplatform way to define an operation to happen at static initialization time
57+
static void
58+
memalloc_init(void);
59+
60+
#ifdef _MSC_VER
61+
#pragma section(".CRT$XCU", read)
62+
__declspec(allocate(".CRT$XCU")) void (*memalloc_init_func)(void) = memalloc_init;
63+
64+
#elif defined(__GNUC__) || defined(__clang__)
65+
__attribute__((constructor))
66+
#else
67+
#error Unsupported compiler
68+
#endif
69+
static void
70+
memalloc_init()
71+
{
72+
// Check if we should crash the process on mutex pass
73+
char* crash_on_mutex_pass_str = getenv(g_crash_on_mutex_pass_str);
74+
bool crash_on_mutex_pass = false;
75+
if (crash_on_mutex_pass_str) {
76+
for (int i = 0; g_truthy_values[i]; i++) {
77+
if (strcmp(crash_on_mutex_pass_str, g_truthy_values[i]) == 0) {
78+
crash_on_mutex_pass = true;
79+
break;
80+
}
81+
}
82+
}
83+
memlock_init(&g_memalloc_lock, crash_on_mutex_pass);
84+
}
85+
4786
static void
4887
memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size)
4988
{
50-
/* Do not overflow; just ignore the new events if we ever reach that point */
51-
if (global_alloc_tracker->alloc_count >= ALLOC_TRACKER_MAX_COUNT)
89+
uint64_t alloc_count = atomic_add_clamped(&global_alloc_tracker->alloc_count, 1, ALLOC_TRACKER_MAX_COUNT);
90+
91+
/* Return if we've reached the maximum number of allocations */
92+
if (alloc_count == 0)
5293
return;
5394

54-
global_alloc_tracker->alloc_count++;
95+
// Return if we can't take the guard
96+
if (!memalloc_take_guard()) {
97+
return;
98+
}
5599

56-
/* Avoid loops */
57-
if (memalloc_get_reentrant())
100+
// In this implementation, the `global_alloc_tracker` isn't intrinsically protected. Before we read or modify,
101+
// take the lock. The count of allocations is already forward-attributed elsewhere, so if we can't take the lock
102+
// there's nothing to do.
103+
if (!memlock_trylock(&g_memalloc_lock)) {
58104
return;
105+
}
59106

60107
/* Determine if we can capture or if we need to sample */
61108
if (global_alloc_tracker->allocs.count < ctx->max_events) {
62-
/* set a barrier so we don't loop as getting a traceback allocates memory */
63-
memalloc_set_reentrant(true);
64109
/* Buffer is not full, fill it */
65110
traceback_t* tb = memalloc_get_traceback(ctx->max_nframe, ptr, size, ctx->domain);
66-
memalloc_set_reentrant(false);
67-
if (tb)
111+
if (tb) {
68112
traceback_array_append(&global_alloc_tracker->allocs, tb);
113+
}
69114
} else {
70115
/* Sampling mode using a reservoir sampling algorithm: replace a random
71116
* traceback with this one */
72-
uint64_t r = random_range(global_alloc_tracker->alloc_count);
117+
uint64_t r = random_range(alloc_count);
73118

74-
if (r < ctx->max_events) {
75-
/* set a barrier so we don't loop as getting a traceback allocates memory */
76-
memalloc_set_reentrant(true);
119+
// In addition to event size, need to check that the tab is in a good state
120+
if (r < ctx->max_events && global_alloc_tracker->allocs.tab != NULL) {
77121
/* Replace a random traceback with this one */
78122
traceback_t* tb = memalloc_get_traceback(ctx->max_nframe, ptr, size, ctx->domain);
79-
memalloc_set_reentrant(false);
123+
124+
// Need to check not only that the tb returned
80125
if (tb) {
81126
traceback_free(global_alloc_tracker->allocs.tab[r]);
82127
global_alloc_tracker->allocs.tab[r] = tb;
83128
}
84129
}
85130
}
131+
132+
memlock_unlock(&g_memalloc_lock);
133+
memalloc_yield_guard();
86134
}
87135

88136
static void
@@ -98,12 +146,6 @@ memalloc_free(void* ctx, void* ptr)
98146
alloc->free(alloc->ctx, ptr);
99147
}
100148

101-
#ifdef _PY37_AND_LATER
102-
Py_tss_t memalloc_reentrant_key = Py_tss_NEEDS_INIT;
103-
#else
104-
int memalloc_reentrant_key = -1;
105-
#endif
106-
107149
static void*
108150
memalloc_alloc(int use_calloc, void* ctx, size_t nelem, size_t elsize)
109151
{
@@ -233,7 +275,10 @@ memalloc_start(PyObject* Py_UNUSED(module), PyObject* args)
233275

234276
global_memalloc_ctx.domain = PYMEM_DOMAIN_OBJ;
235277

236-
global_alloc_tracker = alloc_tracker_new();
278+
if (memlock_trylock(&g_memalloc_lock)) {
279+
global_alloc_tracker = alloc_tracker_new();
280+
memlock_unlock(&g_memalloc_lock);
281+
}
237282

238283
PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &global_memalloc_ctx.pymem_allocator_obj);
239284
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
@@ -258,8 +303,11 @@ memalloc_stop(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args))
258303

259304
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &global_memalloc_ctx.pymem_allocator_obj);
260305
memalloc_tb_deinit();
261-
alloc_tracker_free(global_alloc_tracker);
262-
global_alloc_tracker = NULL;
306+
if (memlock_trylock(&g_memalloc_lock)) {
307+
alloc_tracker_free(global_alloc_tracker);
308+
global_alloc_tracker = NULL;
309+
memlock_unlock(&g_memalloc_lock);
310+
}
263311

264312
memalloc_heap_tracker_deinit();
265313

@@ -310,9 +358,15 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE
310358
if (!iestate)
311359
return NULL;
312360

313-
iestate->alloc_tracker = global_alloc_tracker;
314361
/* reset the current traceback list */
315-
global_alloc_tracker = alloc_tracker_new();
362+
if (memlock_trylock(&g_memalloc_lock)) {
363+
iestate->alloc_tracker = global_alloc_tracker;
364+
global_alloc_tracker = alloc_tracker_new();
365+
memlock_unlock(&g_memalloc_lock);
366+
} else {
367+
Py_TYPE(iestate)->tp_free(iestate);
368+
return NULL;
369+
}
316370
iestate->seq_index = 0;
317371

318372
PyObject* iter_and_count = PyTuple_New(3);
@@ -326,8 +380,11 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE
326380
static void
327381
iterevents_dealloc(IterEventsState* iestate)
328382
{
329-
alloc_tracker_free(iestate->alloc_tracker);
330-
Py_TYPE(iestate)->tp_free(iestate);
383+
if (memlock_trylock(&g_memalloc_lock)) {
384+
alloc_tracker_free(iestate->alloc_tracker);
385+
Py_TYPE(iestate)->tp_free(iestate);
386+
memlock_unlock(&g_memalloc_lock);
387+
}
331388
}
332389

333390
static PyObject*
@@ -442,20 +499,6 @@ PyInit__memalloc(void)
442499
return NULL;
443500
}
444501

445-
#ifdef _PY37_AND_LATER
446-
if (PyThread_tss_create(&memalloc_reentrant_key) != 0) {
447-
#else
448-
memalloc_reentrant_key = PyThread_create_key();
449-
if (memalloc_reentrant_key == -1) {
450-
#endif
451-
#ifdef MS_WINDOWS
452-
PyErr_SetFromWindowsErr(0);
453-
#else
454-
PyErr_SetFromErrno(PyExc_OSError);
455-
#endif
456-
return NULL;
457-
}
458-
459502
if (PyType_Ready(&MemallocIterEvents_Type) < 0)
460503
return NULL;
461504
Py_INCREF((PyObject*)&MemallocIterEvents_Type);

0 commit comments

Comments
 (0)