@@ -118,7 +118,7 @@ void import_pushes(wstream_df_thread_p cthread)
118
118
}
119
119
}
120
120
121
- int work_push_beneficial_max_writer (wstream_df_frame_p fp , wstream_df_thread_p cthread , int num_workers , int * target_worker )
121
+ int work_push_beneficial_max_writer (wstream_df_frame_p fp , wstream_df_thread_p cthread , int num_workers , unsigned * target_worker )
122
122
{
123
123
unsigned int max_worker ;
124
124
int max_data ;
@@ -223,7 +223,7 @@ int work_push_beneficial_split_owner_chain(wstream_df_frame_p fp, wstream_df_thr
223
223
unsigned int max_worker ;
224
224
int numa_node_id ;
225
225
int max_data ;
226
- size_t data [MAX_NUMA_NODES ];
226
+ size_t data [num_numa_nodes ];
227
227
wstream_df_numa_node_p numa_node ;
228
228
unsigned int rand_idx ;
229
229
@@ -247,7 +247,7 @@ int work_push_beneficial_split_owner_chain(wstream_df_frame_p fp, wstream_df_thr
247
247
max_data = data [cthread -> numa_node -> id ];
248
248
numa_node_id = cthread -> numa_node -> id ;
249
249
250
- for (int i = 0 ; i < MAX_NUMA_NODES ; i ++ ) {
250
+ for (unsigned i = 0 ; i < num_numa_nodes ; i ++ ) {
251
251
if ((int )data [i ] > max_data ) {
252
252
max_data = data [i ];
253
253
numa_node_id = i ;
@@ -272,13 +272,13 @@ int work_push_beneficial_split_owner_chain_inner_mw(wstream_df_frame_p fp, wstre
272
272
unsigned int max_worker ;
273
273
int numa_node_id ;
274
274
int max_data ;
275
- size_t data [MAX_NUMA_NODES ];
275
+ size_t data [num_numa_nodes ];
276
276
wstream_df_numa_node_p numa_node ;
277
277
unsigned int rand_idx ;
278
278
int node_id ;
279
279
280
280
#if defined(PUSH_EQUAL_RANDOM )
281
- size_t others [MAX_NUMA_NODES ];
281
+ size_t others [num_numa_nodes ];
282
282
int num_others = 0 ;
283
283
#endif
284
284
@@ -308,14 +308,14 @@ int work_push_beneficial_split_owner_chain_inner_mw(wstream_df_frame_p fp, wstre
308
308
numa_node_id = cthread -> numa_node -> id ;
309
309
310
310
#if defined(PUSH_EQUAL_SEQ )
311
- for (int i = 0 ; i < MAX_NUMA_NODES ; i ++ ) {
311
+ for (unsigned i = 0 ; i < num_numa_nodes ; i ++ ) {
312
312
if ((int )data [i ] > max_data ) {
313
313
max_data = data [i ];
314
314
numa_node_id = i ;
315
315
}
316
316
}
317
317
#elif defined(PUSH_EQUAL_RANDOM )
318
- for (int i = 0 ; i < MAX_NUMA_NODES ; i ++ ) {
318
+ for (unsigned i = 0 ; i < num_numa_nodes ; i ++ ) {
319
319
if ((int )data [i ] > max_data )
320
320
others [num_others ++ ] = i ;
321
321
@@ -368,9 +368,8 @@ int work_push_beneficial_split_score_nodes(wstream_df_frame_p fp, wstream_df_thr
368
368
{
369
369
unsigned int max_worker ;
370
370
int numa_node_id ;
371
- int max_data ;
372
- size_t data [MAX_NUMA_NODES ];
373
- size_t scores [MAX_NUMA_NODES ];
371
+ size_t data [num_numa_nodes ];
372
+ size_t scores [num_numa_nodes ];
374
373
size_t min_score ;
375
374
wstream_df_numa_node_p numa_node ;
376
375
int factor ;
@@ -379,7 +378,7 @@ int work_push_beneficial_split_score_nodes(wstream_df_frame_p fp, wstream_df_thr
379
378
int input_size = 0 ;
380
379
381
380
#if defined(PUSH_EQUAL_RANDOM )
382
- size_t others [MAX_NUMA_NODES ];
381
+ size_t others [num_numa_nodes ];
383
382
int num_others = 0 ;
384
383
#endif
385
384
@@ -390,8 +389,10 @@ int work_push_beneficial_split_score_nodes(wstream_df_frame_p fp, wstream_df_thr
390
389
/* By default assume that data is going to be reused */
391
390
if (vi -> reuse_data_view )
392
391
node_id = slab_numa_node_of (vi -> reuse_data_view -> data );
392
+ #if USE_BROADCAST_TABLES
393
393
else if (vi -> broadcast_table ) /* Peek view with deferred copy */
394
394
node_id = -1 ;
395
+ #endif // USE_BROADCAST_TABLES
395
396
else
396
397
node_id = slab_numa_node_of (vi -> data );
397
398
@@ -407,22 +408,22 @@ int work_push_beneficial_split_score_nodes(wstream_df_frame_p fp, wstream_df_thr
407
408
if (input_size < PUSH_MIN_FRAME_SIZE )
408
409
return 0 ;
409
410
410
- for (int target_node = 0 ; target_node < MAX_NUMA_NODES ; target_node ++ )
411
- for (int source_node = 0 ; source_node < MAX_NUMA_NODES ; source_node ++ )
412
- scores [target_node ] += data [source_node ] * mem_transfer_costs (target_node , source_node );
411
+ for (unsigned target_node = 0 ; target_node < num_numa_nodes ; target_node ++ )
412
+ for (unsigned source_node = 0 ; source_node < num_numa_nodes ; source_node ++ )
413
+ scores [target_node ] += data [source_node ] * hwloc_mem_transfer_cost (target_node , source_node );
413
414
414
415
min_score = scores [cthread -> numa_node -> id ];
415
416
numa_node_id = cthread -> numa_node -> id ;
416
417
417
418
#if defined(PUSH_EQUAL_SEQ )
418
- for (int i = 0 ; i < MAX_NUMA_NODES ; i ++ ) {
419
+ for (unsigned i = 0 ; i < num_numa_nodes ; i ++ ) {
419
420
if (scores [i ] < min_score ) {
420
421
min_score = scores [i ];
421
422
numa_node_id = i ;
422
423
}
423
424
}
424
425
#elif defined(PUSH_EQUAL_RANDOM )
425
- for (int i = 0 ; i < MAX_NUMA_NODES ; i ++ ) {
426
+ for (int i = 0 ; i < num_numa_nodes ; i ++ ) {
426
427
if (scores [i ] == min_score )
427
428
others [num_others ++ ] = i ;
428
429
@@ -466,7 +467,7 @@ int work_push_beneficial_split_score_nodes(wstream_df_frame_p fp, wstream_df_thr
466
467
* of the worker suited best for execution in target_worker. Otherwise 0 is
467
468
* returned.
468
469
*/
469
- int work_push_beneficial (wstream_df_frame_p fp , wstream_df_thread_p cthread , int num_workers , int * target_worker )
470
+ int work_push_beneficial (wstream_df_frame_p fp , wstream_df_thread_p cthread , int num_workers , wstream_df_thread_p * wstream_df_worker_threads , int * target_worker )
470
471
{
471
472
int res ;
472
473
unsigned int lcl_target_worker ;
@@ -496,7 +497,7 @@ int work_push_beneficial(wstream_df_frame_p fp, wstream_df_thread_p cthread, int
496
497
if (/* Only migrate to a different worker */
497
498
lcl_target_worker != cthread -> worker_id &&
498
499
/* Do not migrate to workers that are too close in the memory hierarchy */
499
- mem_lowest_common_level (cthread -> worker_id , worker_id_to_cpu ( lcl_target_worker ) ) >= PUSH_MIN_MEM_LEVEL )
500
+ level_of_common_ancestor (cthread -> cpu , wstream_df_worker_threads [ lcl_target_worker ] -> cpu ) >= PUSH_MIN_MEM_LEVEL )
500
501
{
501
502
* target_worker = lcl_target_worker ;
502
503
return 1 ;
@@ -517,7 +518,6 @@ int work_try_push(wstream_df_frame_p fp,
517
518
{
518
519
int level ;
519
520
int curr_owner ;
520
- int fp_size ;
521
521
522
522
/* Save current owner for statistics and update new owner */
523
523
curr_owner = fp -> last_owner ;
@@ -526,11 +526,14 @@ int work_try_push(wstream_df_frame_p fp,
526
526
/* We need to copy frame attributes used afterwards as the frame will
527
527
* be under control of the target worker once it is pushed.
528
528
*/
529
- fp_size = fp -> size ;
529
+
530
+ #if ALLOW_WQEVENT_SAMPLING
531
+ int fp_size = fp -> size ;
532
+ #endif // ALLOW_WQEVENT_SAMPLING
530
533
531
534
if (fifo_pushback (& wstream_df_worker_threads [target_worker ]-> push_fifo , fp )) {
532
535
/* Push was successful, update traces and statistics */
533
- level = mem_lowest_common_level (cthread -> worker_id , worker_id_to_cpu ( target_worker ) );
536
+ level = level_of_common_ancestor (cthread -> cpu , wstream_df_worker_threads [ target_worker ] -> cpu );
534
537
inc_wqueue_counter (& cthread -> pushes_mem [level ], 1 );
535
538
536
539
trace_push (cthread , target_worker , worker_id_to_cpu (target_worker ), fp_size , fp );
0 commit comments