Skip to content

Commit b593e13

Browse files
mdh1418noahfalk
andauthored
[EventPipe] Remove thread lock and cleanup ThreadSessionState lifecycle (#118415)
Fixes #111368 This PR removes EventPipe's complex dual-lock synchronization scheme and integrates ThreadSessionState cleanup as part of event reading logic. ### Thread Lock Removal - Eliminated the EventPipe thread lock, replacing the complex dual-lock scheme (thread lock + buffer_manager lock) with a single buffer_manager lock - Unified synchronization: Thread list and session_state slot updates now occur atomically under one lock, eliminating race conditions between buffer operations and session state management - Simplified lock-free reads: Removed complex cross-lock coordination requirements ### ThreadSessionState Lifecycle Cleanup - Fixed memory leaks: Proper cleanup of ThreadSessionState objects prevents unbounded growth in long-running sessions - Unified resource management: ThreadSessionState and buffer_list now have matching lifetimes, eliminating buffer_list leaks from the previous cleanup logic - Eliminated TSS leak scenarios when threads exit before session disable - Atomic cleanup operations: Single-lock model enables proper cleanup without cross-lock race conditions - SequencePoints are updated when a ThreadSessionState is being removed ### Slight Event Reading Optimization - Added snapshotting of known EventPipeThreadSessionStates to prevent iterating over Threads whose buffers are guaranteed to not have the earliest events ## Testing Using the original repro ``` Throwing and catching 1000 exceptions (warm up JIT) Throwing and catching 5000000 exceptions RSS: 51.7MiB, user CPU: 100.4% 20000000 EventPipe events received, 5000000 exceptions thrown Spawning 5000 short-lived threads RSS: 60.3MiB, user CPU: 119.0% 20000 EventPipe events received, 5000 exceptions thrown Throwing and catching 5000000 exceptions RSS: 54.3MiB, user CPU: 101.5% 20000000 EventPipe events received, 5000000 exceptions thrown ``` ## Performance Adapted https://github.com/dotnet/diagnostics/tree/main/src/tests/EventPipeStress to also stress the native in-proc event listener EventPipeSession. Testing a matrix of Session Type X Thread Count X Exceptions/Thread Session Type: IPC Streaming, Native In-Proc Thread Count: 1, 8, 16, 50, 100, 1000 Exceptions/Thread: 1, 2, 100, 1000 Note: The local stress test that I put together primarily tests an immediate burst in events being written to EventPipe, which doesn't reflect the behavior of any steady state behavior. ### Native InProc Baseline vs PR <img width="2021" height="531" alt="Screenshot 2026-01-14 221654" src="https://github.com/user-attachments/assets/3bc35c38-fe60-4db0-8210-8d2bdb599dfa" /> This PR overall performs better in fewer events dropped, especially considering the high thread x high exceptions/thread scenarios. ### IPC Streaming Baseline VS PR <img width="2010" height="533" alt="Screenshot 2026-01-14 222304" src="https://github.com/user-attachments/assets/3dc347d8-ab8d-434a-9c42-7ad842f162dc" /> This PR performs similarly, with slight drop (0.8%) in events read for high threads x high exceptions/thread. --------- Co-authored-by: Noah Falk <[email protected]>
1 parent 92d46b9 commit b593e13

File tree

11 files changed

+601
-687
lines changed

11 files changed

+601
-687
lines changed

src/native/eventpipe/ep-block.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -906,10 +906,10 @@ ep_sequence_point_block_init (
906906
const uint32_t thread_count = dn_umap_size (map);
907907
ep_write_buffer_uint32_t (&sequence_point_block->block.write_pointer, thread_count);
908908

909-
DN_UMAP_FOREACH_BEGIN (const EventPipeThreadSessionState *, key, uint32_t, sequence_number, map) {
909+
DN_UMAP_FOREACH_BEGIN (EventPipeThread *, thread, uint32_t, sequence_number, map) {
910910
ep_write_buffer_uint64_t (
911911
&sequence_point_block->block.write_pointer,
912-
ep_thread_get_os_thread_id (ep_thread_session_state_get_thread (key)));
912+
ep_thread_get_os_thread_id (thread));
913913

914914
ep_write_buffer_uint32_t (
915915
&sequence_point_block->block.write_pointer,

src/native/eventpipe/ep-buffer-manager.c

Lines changed: 393 additions & 409 deletions
Large diffs are not rendered by default.

src/native/eventpipe/ep-buffer-manager.h

Lines changed: 16 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ struct _EventPipeBufferList {
2121
#else
2222
struct _EventPipeBufferList_Internal {
2323
#endif
24-
// The thread which writes to the buffers in this list.
25-
EventPipeThreadHolder thread_holder;
2624
// The buffer manager that owns this list.
2725
EventPipeBufferManager *manager;
2826
// Buffers are stored in an intrusive linked-list from oldest to newest.
@@ -31,9 +29,6 @@ struct _EventPipeBufferList_Internal {
3129
EventPipeBuffer *tail_buffer;
3230
// The number of buffers in the list.
3331
uint32_t buffer_count;
34-
// The sequence number of the last event that was read, only
35-
// updated/read by the reader thread.
36-
uint32_t last_read_sequence_number;
3732
};
3833

3934
#if !defined(EP_INLINE_GETTER_SETTER) && !defined(EP_IMPL_BUFFER_MANAGER_GETTER_SETTER)
@@ -42,26 +37,13 @@ struct _EventPipeBufferList {
4237
};
4338
#endif
4439

45-
EP_DEFINE_GETTER_REF(EventPipeBufferList *, buffer_list, EventPipeThreadHolder *, thread_holder);
46-
47-
static
48-
inline
49-
EventPipeThread *
50-
ep_buffer_list_get_thread (EventPipeBufferList *buffer_list)
51-
{
52-
return ep_thread_holder_get_thread (ep_buffer_list_get_thread_holder_cref (buffer_list));
53-
}
54-
5540
EventPipeBufferList *
56-
ep_buffer_list_alloc (
57-
EventPipeBufferManager *manager,
58-
EventPipeThread *thread);
41+
ep_buffer_list_alloc (EventPipeBufferManager *manager);
5942

6043
EventPipeBufferList *
6144
ep_buffer_list_init (
6245
EventPipeBufferList *buffer_list,
63-
EventPipeBufferManager *manager,
64-
EventPipeThread *thread);
46+
EventPipeBufferManager *manager);
6547

6648
void
6749
ep_buffer_list_fini (EventPipeBufferList *buffer_list);
@@ -108,11 +90,22 @@ struct _EventPipeBufferManager_Internal {
10890
ep_rt_spin_lock_handle_t rt_lock;
10991
// The session this buffer manager belongs to.
11092
EventPipeSession *session;
111-
// Iterator state for reader thread.
93+
// ---------------------------- READER-THREAD ONLY BEGIN ----------------------------
11294
// These are not protected by rt_lock and expected to only be used on the reader thread.
95+
// Iterator state for reader thread.
11396
EventPipeEventInstance *current_event;
11497
EventPipeBuffer *current_buffer;
115-
EventPipeBufferList *current_buffer_list;
98+
EventPipeThreadSessionState *current_thread_session_state;
99+
// A snapshot of the thead_session_state_list only used by the reader thread to optimize event iteration.
100+
// As existing and new threads write events, this represents the subset of EventPipeThreadSessionStates
101+
// containing events before the snapshot timestamp. As buffer_manager_move_next_event_any_snapshot_thread
102+
// and buffer_manager_move_next_event_same_thread iterate over events, entries are removed from the list
103+
// when no more events are available before the snapshot timestamp. So once there are no more events
104+
// before the snapshot timestamp, the list will be empty.
105+
dn_list_t *thread_session_state_list_snapshot;
106+
// The timestamp representing the cut-off for the snapshot list.
107+
ep_timestamp_t snapshot_timestamp;
108+
// ---------------------------- READER-THREAD ONLY END ------------------------------
116109
// The total allocation size of buffers under management.
117110
volatile size_t size_of_all_buffers;
118111
// The maximum allowable size of buffers under management.
@@ -190,20 +183,6 @@ ep_buffer_manager_write_event (
190183
ep_rt_thread_handle_t event_thread,
191184
EventPipeStackContents *stack);
192185

193-
// READ_ONLY state and no new EventPipeBuffers or EventPipeBufferLists can be created. Calls to
194-
// write_event that start during the suspension period or were in progress but hadn't yet recorded
195-
// their event into a buffer before the start of the suspension period will return false and the
196-
// event will not be recorded. Any events that not recorded as a result of this suspension will be
197-
// treated the same as events that were not recorded due to configuration.
198-
// EXPECTED USAGE: First the caller will disable all events via configuration, then call
199-
// suspend_write_event () to force any write_event calls that may still be in progress to either
200-
// finish or cancel. After that all BufferLists and Buffers can be safely drained and/or deleted.
201-
// _Requires_lock_held (ep)
202-
void
203-
ep_buffer_manager_suspend_write_event (
204-
EventPipeBufferManager *buffer_manager,
205-
uint32_t session_index);
206-
207186
// Write the contents of the managed buffers to the specified file.
208187
// The stop_timeStamp is used to determine when tracing was stopped to ensure that we
209188
// skip any events that might be partially written due to races when tracing is stopped.
@@ -236,7 +215,7 @@ ep_buffer_manager_get_next_event (EventPipeBufferManager *buffer_manager);
236215
// threads can be in the middle of a write operation and get blocked, and we may not get an opportunity
237216
// to free their buffer for a very long time.
238217
void
239-
ep_buffer_manager_deallocate_buffers (EventPipeBufferManager *buffer_manager);
218+
ep_buffer_manager_close (EventPipeBufferManager *buffer_manager);
240219

241220
#ifdef EP_CHECKED_BUILD
242221
bool

src/native/eventpipe/ep-buffer.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ ep_buffer_free (EventPipeBuffer *buffer)
5656
{
5757
ep_return_void_if_nok (buffer != NULL);
5858

59-
// We should never be deleting a buffer that a writer thread might still try to write to
60-
EP_ASSERT (ep_rt_volatile_load_uint32_t (&buffer->state) == (uint32_t)EP_BUFFER_STATE_READ_ONLY);
59+
// buffers are not guaranteed to be read-only at this point as allocation may have failed,
60+
// a Listener EventPipe Session may not have finished reading all events by disable,
61+
// or an EventPipe session was abruptly disconnected.
6162

6263
ep_rt_vfree (buffer->buffer, buffer->limit - buffer->buffer);
6364
ep_rt_object_free (buffer);
@@ -191,8 +192,6 @@ ep_buffer_convert_to_read_only (EventPipeBuffer *buffer)
191192
EP_ASSERT (buffer != NULL);
192193
EP_ASSERT (buffer->current_read_event == NULL);
193194

194-
ep_thread_requires_lock_held (buffer->writer_thread);
195-
196195
ep_rt_volatile_store_uint32_t (&buffer->state, (uint32_t)EP_BUFFER_STATE_READ_ONLY);
197196

198197
// If this buffer contains an event, select it.

src/native/eventpipe/ep-buffer.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
// isn't writable and read-related methods will assert if it isn't readable. Methods that have no asserts should have immutable results that
2525
// can be used at any point during the buffer's lifetime. The buffer has no internal locks so it is the caller's responsibility to synchronize
2626
// their usage.
27-
// Writing into the buffer and calling convert_to_read_only() is always done with EventPipeThread rt_lock held. The eventual reader thread can do
27+
// Calling convert_to_read_only() is always done with EventPipeBufferManager lock held, and it yields to concurrent writes. The eventual reader thread can do
2828
// a few different things to ensure it sees a consistent state:
29-
// 1) Take the writer's EventPipeThread rt_lock at least once after the last time the writer writes events
29+
// 1) Take the buffer manager's lock at least once after the last time the writer writes events
3030
// 2) Use a memory barrier that prevents reader loads from being re-ordered earlier, such as the one that will occur implicitly by evaluating
3131
// ep_buffer_get_volatile_state ()
3232

@@ -57,7 +57,7 @@ struct _EventPipeBuffer_Internal {
5757
// The linked list is invasive, thus we declare the pointers here.
5858
EventPipeBuffer *prev_buffer;
5959
EventPipeBuffer *next_buffer;
60-
// State transition WRITABLE -> READ_ONLY only occurs while holding the writer_thread->rt_lock;
60+
// State transition WRITABLE -> READ_ONLY only occurs while holding the buffer_manager lock;
6161
// It can be read at any time
6262
volatile uint32_t state;
6363
// The sequence number corresponding to current_read_event
@@ -74,7 +74,6 @@ struct _EventPipeBuffer {
7474
EP_DEFINE_GETTER(EventPipeBuffer *, buffer, ep_timestamp_t, creation_timestamp)
7575
EP_DEFINE_GETTER(EventPipeBuffer *, buffer, uint8_t *, buffer)
7676
EP_DEFINE_GETTER(EventPipeBuffer *, buffer, uint8_t *, limit)
77-
EP_DEFINE_GETTER_REF(EventPipeBuffer *, buffer, volatile uint32_t *, state)
7877
EP_DEFINE_GETTER(EventPipeBuffer *, buffer, EventPipeBuffer *, prev_buffer)
7978
EP_DEFINE_SETTER(EventPipeBuffer *, buffer, EventPipeBuffer *, prev_buffer)
8079
EP_DEFINE_GETTER(EventPipeBuffer *, buffer, EventPipeBuffer *, next_buffer)
@@ -146,7 +145,7 @@ EventPipeBufferState
146145
ep_buffer_get_volatile_state (const EventPipeBuffer *buffer);
147146

148147
// Convert the buffer writable to readable.
149-
// _Requires_lock_held (thread)
148+
// Should only be called while holding the buffer manager lock.
150149
void
151150
ep_buffer_convert_to_read_only (EventPipeBuffer *buffer);
152151

src/native/eventpipe/ep-event-instance.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ sequence_point_fini (EventPipeSequencePoint *sequence_point)
231231

232232
// Each entry in the map owns a ref-count on the corresponding thread
233233
if (dn_umap_size (sequence_point->thread_sequence_numbers) != 0) {
234-
DN_UMAP_FOREACH_KEY_BEGIN (EventPipeThreadSessionState *, key, sequence_point->thread_sequence_numbers) {
235-
ep_thread_release (ep_thread_session_state_get_thread (key));
234+
DN_UMAP_FOREACH_KEY_BEGIN (EventPipeThread *, key, sequence_point->thread_sequence_numbers) {
235+
ep_thread_release (key);
236236
} DN_UMAP_FOREACH_END;
237237
}
238238

src/native/eventpipe/ep-session.c

Lines changed: 26 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,6 @@ static
4040
void
4141
session_create_streaming_thread (EventPipeSession *session);
4242

43-
static
44-
void
45-
ep_session_remove_dangling_session_states (EventPipeSession *session);
46-
4743
static
4844
bool
4945
session_user_events_tracepoints_init (
@@ -380,48 +376,6 @@ ep_session_alloc (
380376
ep_exit_error_handler ();
381377
}
382378

383-
void
384-
ep_session_remove_dangling_session_states (EventPipeSession *session)
385-
{
386-
ep_return_void_if_nok (session != NULL);
387-
388-
DN_DEFAULT_LOCAL_ALLOCATOR (allocator, dn_vector_ptr_default_local_allocator_byte_size);
389-
390-
dn_vector_ptr_custom_init_params_t params = {0, };
391-
params.allocator = (dn_allocator_t *)&allocator;
392-
params.capacity = dn_vector_ptr_default_local_allocator_capacity_size;
393-
394-
dn_vector_ptr_t threads;
395-
396-
if (dn_vector_ptr_custom_init (&threads, &params)) {
397-
ep_thread_get_threads (&threads);
398-
DN_VECTOR_PTR_FOREACH_BEGIN (EventPipeThread *, thread, &threads) {
399-
if (thread) {
400-
EP_SPIN_LOCK_ENTER (ep_thread_get_rt_lock_ref (thread), section1);
401-
EventPipeThreadSessionState *session_state = ep_thread_get_session_state(thread, session);
402-
if (session_state) {
403-
// If a buffer tries to write event(s) but never gets a buffer because the maximum total buffer size
404-
// has been exceeded, we can leak the EventPipeThreadSessionState* and crash later trying to access
405-
// the session from the thread session state. Whenever we terminate a session we check to make sure
406-
// we haven't leaked any thread session states.
407-
ep_thread_delete_session_state(thread, session);
408-
}
409-
EP_SPIN_LOCK_EXIT (ep_thread_get_rt_lock_ref (thread), section1);
410-
411-
ep_thread_release (thread);
412-
}
413-
} DN_VECTOR_PTR_FOREACH_END;
414-
415-
dn_vector_ptr_dispose (&threads);
416-
}
417-
418-
ep_on_exit:
419-
return;
420-
421-
ep_on_error:
422-
ep_exit_error_handler ();
423-
}
424-
425379
void
426380
ep_session_inc_ref (EventPipeSession *session)
427381
{
@@ -445,11 +399,18 @@ ep_session_dec_ref (EventPipeSession *session)
445399
ep_buffer_manager_free (session->buffer_manager);
446400
ep_file_free (session->file);
447401

448-
ep_session_remove_dangling_session_states (session);
449-
450402
ep_rt_object_free (session);
451403
}
452404

405+
void
406+
ep_session_close (EventPipeSession *session)
407+
{
408+
EP_ASSERT (session != NULL);
409+
410+
if (ep_session_type_uses_buffer_manager (session->session_type))
411+
ep_buffer_manager_close (session->buffer_manager);
412+
}
413+
453414
EventPipeSessionProvider *
454415
ep_session_get_session_provider (
455416
const EventPipeSession *session,
@@ -524,14 +485,23 @@ ep_session_execute_rundown (
524485
ep_rt_execute_rundown (execution_checkpoints);
525486
}
526487

488+
// Coordinates an EventPipeSession being freed in disable_holding_lock with
489+
// threads operating on session pointer slots in the _ep_sessions session array.
490+
// It assumes 1) callers have already cleared the session pointer slot from the
491+
// _ep_sessions so all threads that already loaded the session pointer slot will
492+
// be properly awaited and 2) the config lock is held to prevent a new session
493+
// being allocated and inheriting the session pointer slot index, which would
494+
// cause threads to mistakenly operate on the wrong session.
527495
void
528-
ep_session_suspend_write_event (EventPipeSession *session)
496+
ep_session_wait_for_inflight_thread_ops (EventPipeSession *session)
529497
{
530498
EP_ASSERT (session != NULL);
531499

532500
// Need to disable the session before calling this method.
533501
EP_ASSERT (!ep_is_session_enabled ((EventPipeSessionID)session));
534502

503+
ep_requires_lock_held ();
504+
535505
DN_DEFAULT_LOCAL_ALLOCATOR (allocator, dn_vector_ptr_default_local_allocator_byte_size);
536506

537507
dn_vector_ptr_custom_init_params_t params = {0, };
@@ -543,9 +513,13 @@ ep_session_suspend_write_event (EventPipeSession *session)
543513
if (dn_vector_ptr_custom_init (&threads, &params)) {
544514
ep_thread_get_threads (&threads);
545515
DN_VECTOR_PTR_FOREACH_BEGIN (EventPipeThread *, thread, &threads) {
516+
// The session slot must remain cleared from the _ep_sessions array from holding the config lock.
517+
// Otherwise, if the config lock is removed, a newly allocated session could inherit the same slot
518+
// and cause operating threads to mistake the new session for this one.
519+
EP_ASSERT (!ep_is_session_enabled ((EventPipeSessionID)session));
546520
if (thread) {
547-
// Wait for the thread to finish any writes to this session
548-
EP_YIELD_WHILE (ep_thread_get_session_write_in_progress (thread) == session->index);
521+
// The session is disabled, so wait for any in-progress writes to complete.
522+
EP_YIELD_WHILE (ep_thread_get_session_use_in_progress (thread) == session->index);
549523

550524
// Since we've already disabled the session, the thread won't call back in to this
551525
// session once its done with the current write
@@ -556,9 +530,7 @@ ep_session_suspend_write_event (EventPipeSession *session)
556530
dn_vector_ptr_dispose (&threads);
557531
}
558532

559-
if (session->buffer_manager)
560-
// Convert all buffers to read only to ensure they get flushed
561-
ep_buffer_manager_suspend_write_event (session->buffer_manager, session->index);
533+
ep_requires_lock_held ();
562534
}
563535

564536
void

src/native/eventpipe/ep-session.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ EP_DEFINE_GETTER(EventPipeSession *, session, uint32_t, index)
8686
EP_DEFINE_GETTER(EventPipeSession *, session, EventPipeSessionProviderList *, providers)
8787
EP_DEFINE_GETTER(EventPipeSession *, session, EventPipeBufferManager *, buffer_manager)
8888
EP_DEFINE_GETTER_REF(EventPipeSession *, session, volatile uint32_t *, rundown_enabled)
89+
EP_DEFINE_GETTER(EventPipeSession *, session, EventPipeSessionType, session_type)
8990
EP_DEFINE_GETTER(EventPipeSession *, session, uint64_t, rundown_keyword)
9091
EP_DEFINE_GETTER(EventPipeSession *, session, ep_timestamp_t, session_start_time)
9192
EP_DEFINE_GETTER(EventPipeSession *, session, ep_timestamp_t, session_start_timestamp)
@@ -114,6 +115,9 @@ ep_session_inc_ref (EventPipeSession *session);
114115
void
115116
ep_session_dec_ref (EventPipeSession *session);
116117

118+
void
119+
ep_session_close (EventPipeSession *session);
120+
117121
// _Requires_lock_held (ep)
118122
EventPipeSessionProvider *
119123
ep_session_get_session_provider (
@@ -130,11 +134,12 @@ ep_session_execute_rundown (
130134
EventPipeSession *session,
131135
dn_vector_ptr_t *execution_checkpoints);
132136

133-
// Force all in-progress writes to either finish or cancel
134-
// This is required to ensure we can safely flush and delete the buffers
137+
// Wait for all in-progress threads that depend on this session's pointer slot
138+
// in _ep_sessions remaining valid throughout its operation.
139+
// This is required to ensure we can safely flush, close, and free the session.
135140
// _Requires_lock_held (ep)
136141
void
137-
ep_session_suspend_write_event (EventPipeSession *session);
142+
ep_session_wait_for_inflight_thread_ops (EventPipeSession *session);
138143

139144
// Write a sequence point into the output stream synchronously.
140145
void

0 commit comments

Comments
 (0)