Skip to content

Commit 2d49beb

Browse files
committed
steal work that has already been claimed
QueuePerThreadPool normally claims an entire work queue at once instead of popping them off one at a time. This reduces the contention on the work queues. However, this scheme makes it possible to cause starvation when a work item takes a long time to complete and has more work items queued up after it, all of which have already been claimed and are not in the waiting or deferred queues. Added steal_active function to prevent starvation by stealing work that has already been claimed, reproducing the effect of threads popping off individual work items. steal_active is only called after the waiting and deferred queues have been checked and found to be empty, and so should not be called frequently.
1 parent 183a767 commit 2d49beb

File tree

5 files changed

+269
-21
lines changed

5 files changed

+269
-21
lines changed

include/SinglyLinkedList.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ struct SinglyLinkedList {
8383
typedef struct SinglyLinkedList sll_t;
8484

8585
sll_t *sll_init(sll_t *sll);
86-
sll_t *sll_push(sll_t *sll, void *data);
86+
sll_t *sll_push(sll_t *sll, void *data); /* back */
87+
void *sll_pop(sll_t *sll); /* front */
8788
sll_t *sll_move_first(sll_t *dst, sll_t *src, const uint64_t n); /* move first n from src to dst, replacing dst */
8889
sll_t *sll_move(sll_t *dst, sll_t *src); /* move all of src to dst, replacing dst */
8990
sll_t *sll_move_append_first(sll_t *dst, sll_t *src, const uint64_t n); /* move first n from src to dst, appending to dst */

src/QueuePerThreadPool.c

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ typedef enum {
7676

7777
/* The context for a single thread in QPTPool */
7878
typedef struct QPTPoolThreadData {
79+
sll_t active; /* work items that have already been claimed by this thread */
80+
pthread_mutex_t active_mutex;
81+
7982
sll_t waiting; /* generally push into this queue */
8083
sll_t deferred; /* push into here if waiting queue is too big; pop when waiting queue is empty */
8184
pthread_mutex_t mutex;
@@ -163,6 +166,49 @@ static uint64_t steal_work(QPTPool_t *ctx, const size_t id,
163166
return 0;
164167
}
165168

169+
/*
170+
* QueuePerThreadPool normally claims an entire work queue at once
171+
* instead of popping them off one at a time. This reduces the
172+
* contention on the work queues. However, this scheme makes it
173+
* possible to cause starvation when a work item takes a long time to
174+
* complete and has more work items queued up after it, all of which
175+
* have already been claimed and are not in the waiting or deferred
176+
* queues.
177+
*
178+
* This function attempts to prevent starvation by stealing work that
179+
* has already been claimed, reproducing the effect of threads popping
180+
* off individual work items. This function is only called after the
181+
* waiting and deferred queues have been checked and found to be
182+
* empty, and so should not be called frequently.
183+
*/
184+
static uint64_t steal_active(QPTPool_t *ctx, const size_t id,
185+
const size_t start, const size_t end) {
186+
QPTPoolThreadData_t *tw = &ctx->data[id];
187+
188+
for(size_t i = start; i < end; i++) {
189+
if (i == id) {
190+
continue;
191+
}
192+
193+
QPTPoolThreadData_t *target = &ctx->data[i];
194+
195+
if (pthread_mutex_trylock(&target->active_mutex) == 0) {
196+
if (target->active.size) {
197+
const uint64_t active = target->active.size * ctx->steal.num / ctx->steal.denom;
198+
if (active) {
199+
sll_move_first(&tw->waiting, &target->active, active);
200+
pthread_mutex_unlock(&target->active_mutex);
201+
return active;
202+
}
203+
}
204+
205+
pthread_mutex_unlock(&target->active_mutex);
206+
}
207+
}
208+
209+
return 0;
210+
}
211+
166212
static void *worker_function(void *args) {
167213
timestamp_create_start(wf);
168214

@@ -176,10 +222,6 @@ static void *worker_function(void *args) {
176222
QPTPoolThreadData_t *tw = &ctx->data[id];
177223

178224
while (1) {
179-
timestamp_create_start(wf_sll_init);
180-
sll_t being_processed; /* don't bother initializing */
181-
timestamp_set_end(wf_sll_init);
182-
183225
timestamp_create_start(wf_tw_mutex_lock);
184226
pthread_mutex_lock(&tw->mutex);
185227
timestamp_set_end(wf_tw_mutex_lock);
@@ -197,7 +239,19 @@ static void *worker_function(void *args) {
197239
!tw->waiting.size && !tw->deferred.size &&
198240
ctx->incomplete) {
199241
if (steal_work(ctx, id, tw->steal_from, ctx->nthreads) == 0) {
200-
steal_work(ctx, id, 0, tw->steal_from);
242+
if (steal_work(ctx, id, 0, tw->steal_from) == 0) {
243+
/*
244+
* if still can't find anything, try the active queue
245+
*
246+
* this should only be called if there is some
247+
* work that is taking so long that the rest of
248+
* the threads have run out of work, so this
249+
* should not happen too often
250+
*/
251+
if (steal_active(ctx, id, tw->steal_from, ctx->nthreads) == 0) {
252+
steal_active(ctx, id, 0, tw->steal_from);
253+
}
254+
}
201255
}
202256

203257
tw->steal_from = (tw->steal_from + 1) % ctx->nthreads;
@@ -232,18 +286,20 @@ static void *worker_function(void *args) {
232286

233287
/* move entire queue into work and clear out queue */
234288
timestamp_create_start(wf_move_queue);
289+
pthread_mutex_lock(&tw->active_mutex);
235290
if (tw->waiting.size) {
236-
sll_move(&being_processed, &tw->waiting);
291+
sll_move(&tw->active, &tw->waiting);
237292
}
238293
else {
239-
sll_move(&being_processed, &tw->deferred);
294+
sll_move(&tw->active, &tw->deferred);
240295
}
296+
pthread_mutex_unlock(&tw->active_mutex);
241297
timestamp_set_end(wf_move_queue);
242298

243299
#if defined(DEBUG) && defined (QPTPOOL_QUEUE_SIZE)
244300
pthread_mutex_lock(&ctx->mutex);
245301
pthread_mutex_lock(&print_mutex);
246-
tw->waiting.size = being_processed.size;
302+
tw->waiting.size = tw->active.size;
247303

248304
struct timespec now;
249305
clock_gettime(CLOCK_MONOTONIC, &now);
@@ -269,19 +325,35 @@ static void *worker_function(void *args) {
269325
/* process all work */
270326
size_t work_count = 0;
271327

328+
/*
329+
* pop work item off before it is processed so that if another
330+
* thread steals from the active queue, the current active
331+
* work will not be re-run
332+
*
333+
* this has the side effect of moving 2 frees into the loop
334+
* instead of batching all of them after processing the work
335+
*
336+
* tradeoffs:
337+
* more locking
338+
* delayed work
339+
* lower memory utilization
340+
*/
272341
timestamp_create_start(wf_get_queue_head);
273-
sll_node_t *w = sll_head_node(&being_processed);
342+
pthread_mutex_lock(&tw->active_mutex);
343+
struct queue_item *qi = (struct queue_item *) sll_pop(&tw->active);
344+
pthread_mutex_unlock(&tw->active_mutex);
274345
timestamp_end_print(ctx->debug_buffers, id, "wf_get_queue_head", wf_get_queue_head);
275346

276-
while (w) {
347+
while (qi) {
277348
timestamp_create_start(wf_process_work);
278-
struct queue_item *qi = sll_node_data(w);
279-
280349
tw->threads_successful += !qi->func(ctx, id, qi->work, ctx->args);
350+
free(qi);
281351
timestamp_end_print(ctx->debug_buffers, id, "wf_process_work", wf_process_work);
282352

283353
timestamp_create_start(wf_next_work);
284-
w = sll_next_node(w);
354+
pthread_mutex_lock(&tw->active_mutex);
355+
qi = (struct queue_item *) sll_pop(&tw->active);
356+
pthread_mutex_unlock(&tw->active_mutex);
285357
timestamp_end_print(ctx->debug_buffers, id, "wf_next_work", wf_next_work);
286358

287359
work_count++;
@@ -290,16 +362,13 @@ static void *worker_function(void *args) {
290362
timestamp_set_end(wf_process_queue);
291363

292364
timestamp_create_start(wf_cleanup);
293-
sll_destroy(&being_processed, free);
294365
tw->threads_started += work_count;
295-
296366
pthread_mutex_lock(&ctx->mutex);
297367
ctx->incomplete -= work_count;
298368
pthread_mutex_unlock(&ctx->mutex);
299369
timestamp_set_end(wf_cleanup);
300370

301371
#if defined(DEBUG) && defined(PER_THREAD_STATS)
302-
timestamp_print(ctx->debug_buffers, id, "wf_sll_init", wf_sll_init);
303372
timestamp_print(ctx->debug_buffers, id, "wf_tw_mutex_lock", wf_tw_mutex_lock);
304373
timestamp_print(ctx->debug_buffers, id, "wf_ctx_mutex_lock", wf_ctx_mutex_lock);
305374
timestamp_print(ctx->debug_buffers, id, "wf_wait", wf_wait);
@@ -366,6 +435,8 @@ QPTPool_t *QPTPool_init(const size_t nthreads, void *args) {
366435
/* set up thread data, but not threads */
367436
for(size_t i = 0; i < nthreads; i++) {
368437
QPTPoolThreadData_t *data = &ctx->data[i];
438+
sll_init(&data->active);
439+
pthread_mutex_init(&data->active_mutex, NULL);
369440
sll_init(&data->waiting);
370441
sll_init(&data->deferred);
371442
pthread_mutex_init(&data->mutex, NULL);
@@ -702,6 +773,8 @@ void QPTPool_destroy(QPTPool_t *ctx) {
702773
*/
703774
sll_destroy(&data->deferred, free);
704775
sll_destroy(&data->waiting, free);
776+
pthread_mutex_destroy(&data->active_mutex);
777+
sll_destroy(&data->active, free);
705778
}
706779

707780
pthread_mutex_destroy(&ctx->mutex);

src/SinglyLinkedList.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,28 @@ sll_t *sll_push(sll_t *sll, void *data) {
103103
return sll;
104104
}
105105

106+
void *sll_pop(sll_t *sll) {
107+
/* Not checking arguments */
108+
109+
if (sll->size == 0) {
110+
return NULL;
111+
}
112+
113+
sll_node_t *head = sll->head;
114+
void *data = head->data;
115+
sll->head = head->next;
116+
117+
if (sll->tail == head) {
118+
sll->tail = NULL;
119+
}
120+
121+
sll->size--;
122+
123+
free(head);
124+
125+
return data;
126+
}
127+
106128
sll_t *sll_move_first(sll_t *dst, sll_t *src, const uint64_t n) {
107129
/* Not checking arguments */
108130

test/unit/googletest/QueuePerThreadPool.cpp

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ static void test_steal(const QPTPool_enqueue_dst queue, const bool find) {
424424
}, &counter), QPTPool_enqueue_WAIT);
425425

426426
// need to make sure only work item got popped off
427-
while (!counter) {
427+
while (counter < 1) {
428428
sched_yield();
429429
}
430430

@@ -501,7 +501,7 @@ TEST(QueuePerThreadPool, not_enough_deferred) {
501501
}, &counter), QPTPool_enqueue_WAIT);
502502

503503
// need to make sure this work item got popped off
504-
while (!counter) {
504+
while (counter < 1) {
505505
sched_yield();
506506
}
507507

@@ -515,7 +515,7 @@ TEST(QueuePerThreadPool, not_enough_deferred) {
515515
QPTPool_enqueue_WAIT);
516516

517517
// wait until thread 1 runs
518-
while (counter < 1) {
518+
while (counter < 2) {
519519
sched_yield();
520520
}
521521

@@ -524,7 +524,112 @@ TEST(QueuePerThreadPool, not_enough_deferred) {
524524
QPTPool_enqueue_WAIT);
525525

526526
// wait until thread 1 is complete
527-
while (counter < 2) {
527+
while (counter < 3) {
528+
sched_yield();
529+
}
530+
531+
// let thread 0 finish
532+
pthread_mutex_unlock(&mutex);
533+
534+
QPTPool_wait(pool);
535+
536+
const uint64_t expected = 4;
537+
EXPECT_EQ(counter, expected);
538+
EXPECT_EQ(QPTPool_threads_started(pool), expected);
539+
EXPECT_EQ(QPTPool_threads_completed(pool), expected);
540+
541+
QPTPool_destroy(pool);
542+
}
543+
544+
TEST(QueuePerThreadPool, steal_active) {
545+
std::size_t counter = 0;
546+
547+
// macOS doesn't seem to like std::mutex
548+
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
549+
550+
QPTPool_t *pool = QPTPool_init(2, &mutex);
551+
ASSERT_NE(pool, nullptr);
552+
ASSERT_EQ(QPTPool_set_steal(pool, 1, 2), 0);
553+
554+
// prevent thread 0 from completing
555+
pthread_mutex_lock(&mutex);
556+
557+
EXPECT_EQ(QPTPool_enqueue_here(pool, 0, QPTPool_enqueue_WAIT,
558+
[](QPTPool_t *ctx, const std::size_t id, void *data, void *args) -> int {
559+
pthread_mutex_t *mutex = static_cast<pthread_mutex_t *>(args);
560+
561+
increment_counter(ctx, id, data, args);
562+
563+
pthread_mutex_lock(mutex);
564+
pthread_mutex_unlock(mutex);
565+
return 0;
566+
}, &counter), QPTPool_enqueue_WAIT);
567+
568+
struct Data {
569+
std::size_t *counter;
570+
pthread_mutex_t mutex;
571+
std::size_t value;
572+
573+
Data(std::size_t *counter)
574+
: counter(counter),
575+
mutex(PTHREAD_MUTEX_INITIALIZER),
576+
value(0)
577+
{}
578+
};
579+
580+
struct Data data(&counter);
581+
582+
const std::size_t EXPECTED_VALUE = 1234;
583+
const std::size_t BAD_VALUE = 5678;
584+
585+
// thread 0: queue up work that will be claimed by thread 0, but will never run in thread 0
586+
EXPECT_EQ(QPTPool_enqueue_here(pool, 0, QPTPool_enqueue_WAIT,
587+
[](QPTPool_t *ctx, const std::size_t id, void *data, void *args) -> int {
588+
struct Data *d = static_cast<struct Data *>(data);
589+
590+
increment_counter(ctx, id, d->counter, args);
591+
592+
pthread_mutex_lock(&d->mutex);
593+
if (d->value == 0) {
594+
d->value = EXPECTED_VALUE;
595+
}
596+
pthread_mutex_unlock(&d->mutex);
597+
598+
return 0;
599+
}, &data), QPTPool_enqueue_WAIT);
600+
601+
602+
// thread 0: need second work item to be queued for steal fraction of 1/2 to steal 1
603+
EXPECT_EQ(QPTPool_enqueue_here(pool, 0, QPTPool_enqueue_WAIT,
604+
[](QPTPool_t *ctx, const std::size_t id, void *data, void *args) -> int {
605+
struct Data *d = static_cast<struct Data *>(data);
606+
607+
increment_counter(ctx, id, d->counter, args);
608+
609+
pthread_mutex_lock(&d->mutex);
610+
if (d->value == 0) {
611+
d->value = BAD_VALUE;
612+
}
613+
pthread_mutex_unlock(&d->mutex);
614+
615+
return 0;
616+
}, &data), QPTPool_enqueue_WAIT);
617+
618+
// start the thread pool, causing thread 0 to take all work items, but get stuck on the
619+
// first work item, not letting the rest of the work items run
620+
ASSERT_EQ(QPTPool_start(pool), 0);
621+
622+
// need to make sure the first work item started
623+
while (counter < 1) {
624+
sched_yield();
625+
}
626+
627+
// push to thread 1, so after it completes, it steals the first item from thread 0's active queue
628+
EXPECT_EQ(QPTPool_enqueue_here(pool, 1, QPTPool_enqueue_WAIT, increment_counter, &counter),
629+
QPTPool_enqueue_WAIT);
630+
631+
// wait for thread 1's original work item and the stolen work item to finish
632+
while (counter < 3) {
528633
sched_yield();
529634
}
530635

@@ -535,6 +640,7 @@ TEST(QueuePerThreadPool, not_enough_deferred) {
535640

536641
const uint64_t expected = 4;
537642
EXPECT_EQ(counter, expected);
643+
EXPECT_EQ(data.value, EXPECTED_VALUE);
538644
EXPECT_EQ(QPTPool_threads_started(pool), expected);
539645
EXPECT_EQ(QPTPool_threads_completed(pool), expected);
540646

0 commit comments

Comments
 (0)