Skip to content

Commit ca5cc93

Browse files
committed
OpenStream Options: option on/off consistency
Turn option off by settint the value to zero and on to set to any value but zero. Sount default options.
1 parent 5623b87 commit ca5cc93

File tree

9 files changed

+50
-52
lines changed

9 files changed

+50
-52
lines changed

libworkstream_df/alloc.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static inline int slab_force_advise_pages(void* addr, size_t size, int advice)
4747
round_page_size(size),
4848
advice))
4949
{
50-
#ifdef SLAB_ALLOCATOR_VERBOSE
50+
#if SLAB_ALLOCATOR_VERBOSE
5151
fprintf(stderr, "Could not disable use of huge pages\n");
5252
perror("madvise");
5353
#endif // SLAB_ALLOCATOR_VERBOSE
@@ -72,7 +72,7 @@ static inline int slab_get_numa_node(void* address, unsigned int size)
7272
hwloc_bitmap_t numa_nodes = numa_memlocation_of_memory(address, size);
7373
// The memory could be allocated on more than one node, return one of them
7474
int max_node = hwloc_bitmap_first(numa_nodes);
75-
#ifdef SLAB_ALLOCATOR_VERBOSE
75+
#if SLAB_ALLOCATOR_VERBOSE
7676
if(max_node < 0)
7777
fprintf(stderr, "Could not determine node of %p\n", address);
7878
#endif // SLAB_ALLOCATOR_VERBOSE
@@ -229,7 +229,7 @@ static inline int slab_alloc_memalign(slab_cache_p slab_cache, void** ptr, size_
229229
pthread_spin_lock(&slab_cache->free_mem_lock);
230230

231231
if(slab_cache->free_mem_bytes < alloc_size) {
232-
#ifdef SLAB_ALLOCATOR_VERBOSE
232+
#if SLAB_ALLOCATOR_VERBOSE
233233
if(slab_cache->free_mem_bytes)
234234
printf("wasted %zu bytes\n", slab_cache->free_mem_bytes);
235235
#endif // SLAB_ALLOCATOR_VERBOSE
@@ -328,7 +328,7 @@ slab_warmup (slab_cache_p slab_cache, unsigned int idx, unsigned int num_slabs,
328328
assert(!posix_memalign_success);
329329

330330
if (bind_memory_to_numa_node(alloc, alloc_size, node)) {
331-
#ifdef SLAB_ALLOCATOR_VERBOSE
331+
#if SLAB_ALLOCATOR_VERBOSE
332332
fprintf(stderr, "Could not slab memory to numa node %u\n", node);
333333
#endif // SLAB_ALLOCATOR_VERBOSE
334334
}

libworkstream_df/config.h

+10-12
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
* When a worker did not successfully steal some work from any other workers,
2020
* the worker will relinquish the CPU and the thread is placed at the end of
2121
* the scheduler queue.
22-
* Activating this option will activate active polling for new task without stealing the
22+
* With this option active, workers will actively poll for new task without
23+
* relinquishing the CPU.
2324
*/
2425

25-
// #define WS_NO_YIELD_SPIN
26+
#define WS_NO_YIELD_SPIN 0
2627

2728
/*
2829
* Disable workers local cache. A Task placed into this local cache cannot be
@@ -67,15 +68,15 @@
6768
* This may have a performance impact.
6869
*/
6970

70-
// #define SLAB_ALLOCATOR_VERBOSE
71+
#define SLAB_ALLOCATOR_VERBOSE 0
7172

7273
/*
7374
* Make HWLOC print information about the hardware and worker placement. This
7475
* has no performance impact on the program other than the initialisation in
7576
* the pre_main function.
7677
*/
7778

78-
// #define HWLOC_VERBOSE
79+
#define HWLOC_VERBOSE 0
7980

8081
/*********************** OpenStream Profiling Options ***********************/
8182

@@ -92,7 +93,8 @@
9293
* REQUIRES WQUEUE_PROFILE
9394
*/
9495

95-
// #define MATRIX_PROFILE "wqueue_matrix.out"
96+
#define MATRIX_PROFILE 0
97+
#define MATRIX_PROFILE_OUTPUT "wqueue_matrix.out"
9698

9799
/*
98100
* Use linux getrusage function to gather resource usage for running threads.
@@ -104,7 +106,7 @@
104106
* - The number of involuntary context switches (e.g. kernel scheduler intervention)
105107
*/
106108

107-
// #define PROFILE_RUSAGE
109+
#define PROFILE_RUSAGE 0
108110

109111
/*********************** OpenStream Probably Broken Options ***********************/
110112

@@ -154,12 +156,8 @@
154156
/*
155157
* Some configuration checks
156158
*/
157-
#if defined(UNIFORM_MEMORY_ACCESS) && MAX_NUMA_NODES != 1
158-
#error "UNIFORM_MEMORY_ACCESS defined, but MAX_NUMA_NODES != 1"
159-
#endif
160-
161-
#if defined(MATRIX_PROFILE) && !WQUEUE_PROFILE
159+
#if MATRIX_PROFILE && !WQUEUE_PROFILE
162160
#error "MATRIX_PROFILE defined, but WQUEUE_PROFILE != 1"
163-
#endif // defined(MATRIX_PROFILE) && !defined(WQUEUE_PROFILE)
161+
#endif
164162

165163
#endif

libworkstream_df/hwloc-support.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ bool discover_machine_topology(void) {
6969
malloc(nproc * sizeof(*cpuid_to_closest_numa_node));
7070
populate_closest_numa_nodes();
7171

72-
#ifdef HWLOC_VERBOSE
72+
#if HWLOC_VERBOSE
7373
fprintf(stdout,
7474
"[HWLOC Info] The machine has a depth of %d\n"
7575
"[HWLOC Info] The machine has %d numa node(s)\n"
@@ -125,7 +125,7 @@ bool distribute_worker_on_topology(unsigned num_workers,
125125
hwloc_bitmap_or(restricted_set, restricted_set, distrib_sets[i]);
126126
(*processing_units)[i] = hwloc_get_next_obj_inside_cpuset_by_type(
127127
machine_topology, distrib_sets[i], HWLOC_OBJ_PU, NULL);
128-
#ifdef HWLOC_VERBOSE
128+
#if HWLOC_VERBOSE
129129
fprintf(stderr, "Worker %u mapped to processing unit %u (OS index %u)\n", i,
130130
(*processing_units)[i]->logical_index,
131131
(*processing_units)[i]->os_index);

libworkstream_df/numa.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ static inline void numa_node_init(wstream_df_numa_node_p node, int node_id) {
1414
if (bind_memory_to_numa_node(node->workers,
1515
wstream_num_workers * sizeof(*node->workers),
1616
node_id)) {
17-
#ifdef HWLOC_VERBOSE
17+
#if HWLOC_VERBOSE
1818
fprintf(stderr, "Could not bind memory to numa node %u\n", node_id);
1919
#endif // HWLOC_VERBOSE
2020
}
@@ -30,7 +30,7 @@ int numa_nodes_init(void)
3030
wstream_df_fatal("Cannot allocate numa node structure");
3131

3232
if (bind_memory_to_numa_node(ptr, size, i)) {
33-
#ifdef HWLOC_VERBOSE
33+
#if HWLOC_VERBOSE
3434
fprintf(stderr, "Could not bind memory to numa node %u\n", i);
3535
#endif // HWLOC_VERBOSE
3636
}

libworkstream_df/profiling.c

+12-12
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
#include "numa.h"
77
#include <pthread.h>
88

9-
#ifdef PROFILE_RUSAGE
9+
#if PROFILE_RUSAGE
1010
#include <sys/time.h>
1111
#include <sys/resource.h>
12-
#endif // defined(PROFILE_RUSAGE)
12+
#endif // PROFILE_RUSAGE
1313

14-
#ifdef MATRIX_PROFILE
14+
#if MATRIX_PROFILE
1515
#include <assert.h>
1616

1717
void *tm_data__;
@@ -25,7 +25,7 @@ void init_transfer_matrix(void) {
2525
void dump_transfer_matrix(unsigned int num_workers)
2626
{
2727
unsigned int i, j;
28-
FILE* matrix_fp = fopen(MATRIX_PROFILE, "w+");
28+
FILE* matrix_fp = fopen(MATRIX_PROFILE_OUTPUT, "w+");
2929
assert(matrix_fp);
3030

3131
for (i = 0; i < num_workers; ++i) {
@@ -247,7 +247,7 @@ wqueue_counters_enter_runtime(struct wstream_df_thread* th)
247247
{
248248
}
249249

250-
#ifdef PROFILE_RUSAGE
250+
#if PROFILE_RUSAGE
251251

252252
void wqueue_counters_profile_rusage(struct wstream_df_thread *th) {
253253
struct rusage usage;
@@ -263,15 +263,15 @@ void wqueue_counters_profile_rusage(struct wstream_df_thread *th) {
263263
th->inv_context_switches = usage.ru_nivcsw;
264264
}
265265

266-
#endif // defined(PROFILE_RUSAGE)
266+
#endif // PROFILE_RUSAGE
267267

268268
void init_wqueue_counters(wstream_df_thread_p th) {
269269
th->steals_owncached = 0;
270270
th->steals_ownqueue = 0;
271271
th->steals_mem = calloc(topology_depth, sizeof(*th->steals_mem));
272272
if (bind_memory_to_cpu_memspace(
273273
th->steals_mem, topology_depth * sizeof(*th->steals_mem), th->cpu)) {
274-
#ifdef HWLOC_VERBOSE
274+
#if HWLOC_VERBOSE
275275
perror("hwloc_membind");
276276
#endif // HWLOC_VERBOSE
277277
}
@@ -284,7 +284,7 @@ void init_wqueue_counters(wstream_df_thread_p th) {
284284
th->bytes_mem = calloc(topology_depth, sizeof(*th->bytes_mem));
285285
if (bind_memory_to_cpu_memspace(
286286
th->bytes_mem, topology_depth * sizeof(*th->bytes_mem), th->cpu)) {
287-
#ifdef HWLOC_VERBOSE
287+
#if HWLOC_VERBOSE
288288
perror("hwloc_membind");
289289
#endif // HWLOC_VERBOSE
290290
}
@@ -297,13 +297,13 @@ void init_wqueue_counters(wstream_df_thread_p th) {
297297

298298
th->reuse_addr = 0;
299299
th->reuse_copy = 0;
300-
#ifdef PROFILE_RUSAGE
300+
#if PROFILE_RUSAGE
301301
th->system_time_us = 0;
302302
th->major_page_faults = 0;
303303
th->minor_page_faults = 0;
304304
th->max_resident_size = 0;
305305
th->inv_context_switches = 0;
306-
#endif // defined(PROFILE_RUSAGE)
306+
#endif // PROFILE_RUSAGE
307307

308308
init_papi(th);
309309
}
@@ -316,7 +316,7 @@ dump_wqueue_counters_single (wstream_df_thread_p th)
316316
int i;
317317
const char* events[] = WS_PAPI_EVENTS;
318318
#endif // defined(WS_PAPI_PROFILE)
319-
#ifdef PROFILE_RUSAGE
319+
#if PROFILE_RUSAGE
320320
printf ("Thread %d: system_time_us = %lld\n",
321321
th->worker_id,
322322
th->system_time_us);
@@ -332,7 +332,7 @@ dump_wqueue_counters_single (wstream_df_thread_p th)
332332
printf ("Thread %d: inv_context_switches = %lld\n",
333333
th->worker_id,
334334
th->inv_context_switches);
335-
#endif // defined(PROFILE_RUSAGE)
335+
#endif // PROFILE_RUSAGE
336336
printf ("Thread %d: tasks_created = %lld\n",
337337
th->worker_id,
338338
th->tasks_created);

libworkstream_df/profiling.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,16 @@ init_papi(struct wstream_df_thread* th);
4949
#define update_papi_timestamp(th, ts) do { } while(0)
5050
#endif
5151

52-
#ifdef PROFILE_RUSAGE
52+
#if PROFILE_RUSAGE
5353
#define PROFILE_RUSAGE_FIELDS \
5454
unsigned long long system_time_us; \
5555
unsigned long long major_page_faults; \
5656
unsigned long long minor_page_faults; \
5757
unsigned long long max_resident_size; \
5858
unsigned long long inv_context_switches;
59-
#else // !defined(PROFILE_RUSAGE)
59+
#else // !PROFILE_RUSAGE
6060
#define PROFILE_RUSAGE_FIELDS
61-
#endif // defined(PROFILE_RUSAGE)
61+
#endif // PROFILE_RUSAGE
6262

6363
#if WQUEUE_PROFILE
6464
#define WSTREAM_DF_THREAD_WQUEUE_PROFILE_BASIC_FIELDS \
@@ -85,11 +85,11 @@ stop_wqueue_counters (void);
8585
void
8686
wqueue_counters_enter_runtime(struct wstream_df_thread* th);
8787

88-
#ifdef PROFILE_RUSAGE
88+
#if PROFILE_RUSAGE
8989
void wqueue_counters_profile_rusage(struct wstream_df_thread *th);
90-
#else // !defined(PROFILE_RUSAGE)
90+
#else // !PROFILE_RUSAGE
9191
#define wqueue_counters_profile_rusage(th) do {} while(0)
92-
#endif // defined(PROFILE_RUSAGE)
92+
#endif // PROFILE_RUSAGE
9393

9494
void
9595
dump_wqueue_counters (unsigned int num_workers, struct wstream_df_thread** wstream_df_worker_threads);
@@ -127,7 +127,7 @@ dump_global_wqueue_counters ();
127127
#define set_wqueue_counter_if_zero(ctr, val) do {} while(0)
128128
#endif
129129

130-
#ifdef MATRIX_PROFILE
130+
#if MATRIX_PROFILE
131131

132132
extern void *tm_data__;
133133
#define transfer_matrix ((unsigned long long(*)[wstream_num_workers])tm_data__)
@@ -141,7 +141,7 @@ inline void inc_transfer_matrix_entry(unsigned int consumer,
141141
void init_transfer_matrix(void);
142142
void dump_transfer_matrix(unsigned int num_workers);
143143

144-
#else // !defined(MATRIX_PROFILE)
144+
#else // !MATRIX_PROFILE
145145

146146
#define inc_transfer_matrix_entry(consumer, producer, num_bytes) do {} while(0)
147147
#define init_transfer_matrix() do {} while(0)

libworkstream_df/reuse.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ void match_reuse_input_clause_with_output_clause(wstream_df_view_p out_view, wst
7272
reuse_view_sanity_check(out_view, in_view);
7373
in_view->reuse_data_view = NULL;
7474

75-
#ifdef DEFERRED_ALLOC
75+
#if DEFERRED_ALLOC
7676
out_view->consumer_view = in_view;
7777
#else
7878
wstream_df_thread_p cthread = current_thread;
@@ -94,7 +94,7 @@ void match_reuse_output_clause_with_input_clause(wstream_df_view_p out_view, wst
9494
assert(!is_reuse_view(in_view));
9595
assert(in_view->refcount == 1);
9696

97-
#ifdef DEFERRED_ALLOC
97+
#if DEFERRED_ALLOC
9898
if(!in_view->data) {
9999
match_reuse_output_clause_with_reuse_input_clause(out_view, in_view);
100100
return;
@@ -244,7 +244,7 @@ void __built_in_wstream_df_prepare_data_vec(size_t n, void* v)
244244

245245
void __built_in_wstream_df_reuse_update_data(void* v)
246246
{
247-
#ifdef DEFERRED_ALLOC
247+
#if DEFERRED_ALLOC
248248
const int deferred_alloc_enabled = 1;
249249
#else
250250
const int deferred_alloc_enabled = 0;

libworkstream_df/trace.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,13 @@ void trace_runtime_counters(struct wstream_df_thread* cthread)
7070
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_SLAB_REFILLS, cthread->slab_cache->slab_refills);
7171
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_REUSE_ADDR, cthread->reuse_addr);
7272
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_REUSE_COPY, cthread->reuse_copy);
73-
#ifdef PROFILE_RUSAGE
73+
#if PROFILE_RUSAGE
7474
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_SYSTEM_TIME_US, cthread->system_time_us);
7575
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_MAJOR_PAGE_FAULTS, cthread->major_page_faults);
7676
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_MINOR_PAGE_FAULTS, cthread->minor_page_faults);
7777
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_MAX_RESIDENT_SIZE, cthread->max_resident_size);
7878
trace_counter(cthread, RUNTIME_COUNTER_BASE+RUNTIME_COUNTER_INV_CONTEXT_SWITCHES, cthread->inv_context_switches);
79-
#endif // defined(PROFILE_RUSAGE)
79+
#endif // PROFILE_RUSAGE
8080

8181
uint64_t steals = 0;
8282
for(int level = 0; level < MEM_NUM_LEVELS; level++)

libworkstream_df/wstream_df.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ void __built_in_wstream_df_alloc_view_data(void* v, size_t size)
558558

559559
void __built_in_wstream_df_alloc_view_data_deferred(void* v, size_t size)
560560
{
561-
#ifdef DEFERRED_ALLOC
561+
#if DEFERRED_ALLOC
562562
wstream_df_view_p view = v;
563563
view->data = NULL;
564564
#else
@@ -761,7 +761,7 @@ wstream_df_resolve_dependences (void *v, void *s, bool is_read_view_p)
761761
match_reuse_output_clause_with_input_clause(prod_view, view);
762762
}
763763

764-
#ifdef DEFERRED_ALLOC
764+
#if DEFERRED_ALLOC
765765
/* Data of the consumer view has not been allocated
766766
* yet. If we are the only producer, we further defer
767767
* allocation until the producer gets ready. Otherwise
@@ -828,7 +828,7 @@ wstream_df_resolve_dependences (void *v, void *s, bool is_read_view_p)
828828
match_reuse_output_clause_with_input_clause(view, cons_view);
829829
}
830830

831-
#ifdef DEFERRED_ALLOC
831+
#if DEFERRED_ALLOC
832832
/* Data of the consumer view has not been allocated
833833
* yet. If we are the only producer, we further defer
834834
* allocation until the producer gets ready. Otherwise
@@ -970,7 +970,7 @@ __attribute__((__optimize__("O1"))) static void worker_thread(void) {
970970
wqueue_counters_enter_runtime(current_thread);
971971
inc_wqueue_counter(&cthread->tasks_executed, 1);
972972
} else {
973-
#ifndef WS_NO_YIELD_SPIN
973+
#if WS_NO_YIELD_SPIN
974974
sched_yield();
975975
#endif
976976
}
@@ -1188,15 +1188,15 @@ void pre_main()
11881188

11891189
init_transfer_matrix();
11901190

1191-
#ifdef HWLOC_VERBOSE
1191+
#if HWLOC_VERBOSE
11921192
fprintf(stdout, "\n[HWLOC Info] Using %u workers distributed as:\n", wstream_num_workers);
11931193
#endif
11941194
hwloc_obj_t *processor_mapping = NULL;
11951195
if (!distribute_worker_on_topology(wstream_num_workers, &processor_mapping)) {
11961196
wstream_df_error("[hwloc] Warning: could distribute workers on %d CPUs\n",
11971197
wstream_num_workers);
11981198
}
1199-
#ifdef HWLOC_VERBOSE
1199+
#if HWLOC_VERBOSE
12001200
fprintf(stdout, "\n[HWLOC Info] Worker placement topology:\n");
12011201
print_topology_tree(stdout);
12021202
#endif
@@ -1532,7 +1532,7 @@ broadcast (void *v)
15321532
bt->refcount++;
15331533
#endif
15341534
} else {
1535-
#ifdef DEFERRED_ALLOC
1535+
#if DEFERRED_ALLOC
15361536
if(!peek_view->data)
15371537
__built_in_wstream_df_alloc_view_data(peek_view, peek_view->horizon);
15381538
#endif

0 commit comments

Comments
 (0)