Skip to content

Commit 0d933ca

Browse files
committed
Rewrite code handling inout_reuse clauses
This version should be considered the first version with proper performance for inout_reuse. The main changes are: - Determination of the NUMA node of a view associated to an inout_reuse clause is now correct - The decision whether data of a view associated to an inout_reuse clause should be copied on a node change is not speculative anymore: the decision is taken when the task is going to be executed, i.e. when the CPU and hence the NUMA node on which it executes are known
1 parent 45c1efa commit 0d933ca

File tree

9 files changed

+366
-177
lines changed

9 files changed

+366
-177
lines changed

gcc/gcc/omp-low.c

+39-4
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ typedef struct wstream_df_view
202202
tree wstream_df_view_field_reuse_consumer_view;
203203
tree wstream_df_view_field_refcount;
204204
tree wstream_df_view_field_view_chain_next;
205+
tree wstream_df_view_field_copy_count;
206+
tree wstream_df_view_field_reuse_count;
207+
tree wstream_df_view_field_ignore_count;
205208

206209
bool is_array_view;
207210
tree base_offset;
@@ -2550,8 +2553,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
25502553
view_ref = build_receiver_ref (view, false, ctx);
25512554
ref = build_addr(view_ref, current_function_decl);
25522555
x = build_call_expr (reuse_prepare_data_fn, 1, ref);
2553-
gimplify_stmt (&x, &tseq);
2554-
gsi_insert_seq_before (&diter, tseq, GSI_SAME_STMT);
2556+
gimplify_stmt (&x, ilist);
25552557
}
25562558

25572559
if (v->is_array_view == false)
@@ -3238,16 +3240,31 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
32383240
unshare_expr (view_ref_prematch), v->wstream_df_view_field_view_chain_next, NULL);
32393241
gimplify_assign (ref, null_pointer_node, &datafield_list);
32403242

3241-
/* Set refcount to 0 */
3243+
/* Set refcount to 1 */
32423244
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_refcount),
32433245
unshare_expr (view_ref_prematch), v->wstream_df_view_field_refcount, NULL);
3244-
gimplify_assign (ref, integer_zero_node, &datafield_list);
3246+
gimplify_assign (ref, integer_one_node, &datafield_list);
32453247

32463248
/* Set reuse_source_view to NULL */
32473249
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_reuse_data_view),
32483250
unshare_expr (view_ref_prematch), v->wstream_df_view_field_reuse_data_view, NULL);
32493251
gimplify_assign (ref, null_pointer_node, &datafield_list);
32503252

3253+
/* Set ignore_count to 0 */
3254+
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_ignore_count),
3255+
unshare_expr (view_ref_prematch), v->wstream_df_view_field_ignore_count, NULL);
3256+
gimplify_assign (ref, integer_zero_node, &datafield_list);
3257+
3258+
/* Set reuse_count to 0 */
3259+
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_reuse_count),
3260+
unshare_expr (view_ref_prematch), v->wstream_df_view_field_reuse_count, NULL);
3261+
gimplify_assign (ref, integer_zero_node, &datafield_list);
3262+
3263+
/* Set copy_count to 0 */
3264+
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_copy_count),
3265+
unshare_expr (view_ref_prematch), v->wstream_df_view_field_copy_count, NULL);
3266+
gimplify_assign (ref, integer_zero_node, &datafield_list);
3267+
32513268
/* Set reuse_consumer_view to NULL */
32523269
ref = build3 (COMPONENT_REF, TREE_TYPE (v->wstream_df_view_field_reuse_consumer_view),
32533270
unshare_expr (view_ref_prematch), v->wstream_df_view_field_reuse_consumer_view, NULL);
@@ -7968,6 +7985,24 @@ build_wstream_df_view_type (omp_context *ctx, tree data_type)
79687985
TYPE_NAME (view_t) = name;
79697986

79707987
/* Add fields. */
7988+
name = create_tmp_var_name ("ignore_count");
7989+
type = integer_type_node;
7990+
field = build_decl (gimple_location (ctx->stmt), FIELD_DECL, name, type);
7991+
insert_field_into_struct (view_t, field);
7992+
ret->wstream_df_view_field_ignore_count = field;
7993+
7994+
name = create_tmp_var_name ("reuse_count");
7995+
type = integer_type_node;
7996+
field = build_decl (gimple_location (ctx->stmt), FIELD_DECL, name, type);
7997+
insert_field_into_struct (view_t, field);
7998+
ret->wstream_df_view_field_reuse_count = field;
7999+
8000+
name = create_tmp_var_name ("copy_count");
8001+
type = integer_type_node;
8002+
field = build_decl (gimple_location (ctx->stmt), FIELD_DECL, name, type);
8003+
insert_field_into_struct (view_t, field);
8004+
ret->wstream_df_view_field_copy_count = field;
8005+
79718006
name = create_tmp_var_name ("view_chain_next");
79728007
type = ptr_type_node;
79738008
field = build_decl (gimple_location (ctx->stmt), FIELD_DECL, name, type);

libworkstream_df/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ endif
5454
../install/bin/gcc:
5555
cd .. ; $(MAKE)
5656

57-
libwstream_df.so: wstream_df.c error.c trace.c fibers.c profiling.c work_distribution.c trace_file.c trace_file.h ansi_extras.h convert.c convert.h tsc.c interleave.c numa.c $(WSTREAM_DEPS) $(GCCDEP)
57+
libwstream_df.so: wstream_df.c error.c trace.c fibers.c profiling.c work_distribution.c trace_file.c trace_file.h ansi_extras.h convert.c convert.h tsc.c interleave.c numa.c reuse.c $(WSTREAM_DEPS) $(GCCDEP)
5858
ifeq ($(PAPI_CHECKED),)
5959
@$(MAKE) -C ../extras -f Makefile.papi_enabled
6060
@PAPI_ENABLED=`../extras/papi_enabled` ; \

libworkstream_df/configs/doble.h

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
//#define PUSH_EQUAL_SEQ
4444

4545
#define REUSE_COPY_ON_NODE_CHANGE
46+
//#define REUSE_STOPCOPY
47+
//#define REUSE_STOPCOPY_CHAIN_LENGTH 4
4648
//#define REUSE_DONTCOPY_ON_STEAL
4749

4850
//#define PUSH_ONLY_IF_NOT_STOLEN_AND_CACHE_EMPTY

libworkstream_df/configs/idchire.h

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545

4646
#define TOPOLOGY_AWARE_WORKSTEALING
4747
#define REUSE_COPY_ON_NODE_CHANGE
48+
//#define REUSE_STOPCOPY
49+
//#define REUSE_STOPCOPY_CHAIN_LENGTH 4
4850
//#define REUSE_DONTCOPY_ON_STEAL
4951

5052
//#define PAPI_L1

libworkstream_df/reuse.c

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#include "reuse.h"
2+
#include "alloc.h"
3+
#include "numa.h"
4+
5+
void __built_in_wstream_df_alloc_view_data_slab(wstream_df_view_p view, size_t size, slab_cache_p slab_cache);
6+
extern __thread wstream_df_thread_p current_thread;
7+
8+
void reuse_view_sanity_check(wstream_df_view_p out_view, wstream_df_view_p in_view)
9+
{
10+
if(in_view->reached_position != 0 || out_view->burst != in_view->horizon)
11+
{
12+
fprintf(stderr,
13+
"Reading from an inout_reuse clause or writing to it requires the burst "
14+
"output clause to be equal to the reading clause's horizon.\n");
15+
exit(1);
16+
}
17+
}
18+
19+
void match_reuse_output_clause_with_reuse_input_clause(wstream_df_view_p out_view, wstream_df_view_p in_view)
20+
{
21+
reuse_view_sanity_check(out_view, in_view);
22+
23+
out_view->reuse_consumer_view = in_view;
24+
in_view->reuse_data_view = out_view->reuse_associated_view;
25+
26+
/* The data pointer will be set at the beginning of the execution as
27+
* we don't know yet whether we will only reuse the existing buffer
28+
* or if we will copy the data to a newly allocated buffer */
29+
in_view->data = NULL;
30+
31+
/* Increment reference count of direct predecessor */
32+
__built_in_wstream_df_inc_frame_ref(out_view->reuse_associated_view->owner, 1);
33+
__built_in_wstream_df_inc_view_ref(out_view->reuse_associated_view, 1);
34+
35+
assert(out_view->reuse_associated_view->refcount == 2);
36+
assert(out_view->reuse_associated_view->refcount > 1);
37+
assert(((wstream_df_frame_p)out_view->reuse_associated_view->owner)->refcount > 1);
38+
}
39+
40+
void match_reuse_input_clause_with_output_clause(wstream_df_view_p out_view, wstream_df_view_p in_view)
41+
{
42+
wstream_df_thread_p cthread = current_thread;
43+
44+
reuse_view_sanity_check(out_view, in_view);
45+
in_view->reuse_data_view = NULL;
46+
47+
/* The output clause assumes that it writes to a regular input
48+
* clause and expects in_view->data to be a valid pointer.
49+
* FIXME: Do not use local slab cache here */
50+
__built_in_wstream_df_alloc_view_data_slab(in_view, in_view->horizon, cthread->slab_cache);
51+
52+
assert(in_view->data);
53+
assert(reuse_view_has_own_data(in_view));
54+
}
55+
56+
void match_reuse_output_clause_with_input_clause(wstream_df_view_p out_view, wstream_df_view_p in_view)
57+
{
58+
reuse_view_sanity_check(out_view, in_view);
59+
out_view->reuse_consumer_view = in_view;
60+
61+
assert(!is_reuse_view(in_view));
62+
assert(in_view->data);
63+
assert(in_view->refcount == 1);
64+
65+
/* Increment reference count of the output clause */
66+
__built_in_wstream_df_inc_frame_ref(out_view->owner, 1);
67+
__built_in_wstream_df_inc_view_ref(out_view->reuse_associated_view, 1);
68+
}
69+
70+
/* The parameter v is a pointer to the fake output view of the task
71+
* that is to be executed.
72+
*/
73+
void __built_in_wstream_df_reuse_prepare_data(void* v)
74+
{
75+
int force_reuse = 0;
76+
77+
#ifndef REUSE_COPY_ON_NODE_CHANGE
78+
force_reuse = 1;
79+
#endif
80+
81+
wstream_df_thread_p cthread = current_thread;
82+
83+
/* Fake output view of the task to be executed */
84+
wstream_df_view_p out_view = v;
85+
86+
/* Input view of the task to be executed */
87+
wstream_df_view_p in_view = out_view->reuse_associated_view;
88+
89+
/* If we don't read from a reuse view, there's nothing to do as
90+
we already have a local buffer filled with data */
91+
if(!reuse_view_has_reuse_predecessor(in_view))
92+
return;
93+
94+
wstream_df_view_p reuse_data_view = in_view->reuse_data_view;
95+
96+
/* View of a direct consumer of the task to be executed */
97+
wstream_df_view_p consumer_view = out_view->reuse_consumer_view;
98+
99+
if(wstream_is_fresh(reuse_data_view->data) && reuse_data_view->horizon > 10000) {
100+
wstream_update_numa_node_of(reuse_data_view->data);
101+
trace_frame_info(cthread, reuse_data_view->data);
102+
slab_set_max_initial_writer_of(reuse_data_view->data, 0, 0);
103+
}
104+
105+
/* Node of the data if reused */
106+
int reuse_numa_node = wstream_numa_node_of(reuse_data_view->data);
107+
108+
/* Node of the task that is to be executed */
109+
int this_numa_node = cthread->numa_node->id;
110+
111+
/* Copy statistics */
112+
in_view->reuse_count = reuse_data_view->reuse_count;
113+
in_view->copy_count = reuse_data_view->copy_count;
114+
in_view->ignore_count = reuse_data_view->ignore_count;
115+
116+
#ifdef REUSE_STOPCOPY
117+
if(in_view->copy_count + in_view->reuse_count > REUSE_STOPCOPY_CHAIN_LENGTH)
118+
force_reuse = 1;
119+
#endif
120+
121+
/* Migrate data if consumer executes on another than the data is
122+
* located on */
123+
if(reuse_numa_node != this_numa_node && !force_reuse) {
124+
__built_in_wstream_df_alloc_view_data_slab(in_view, in_view->horizon, cthread->slab_cache);
125+
126+
trace_state_change(cthread, WORKER_STATE_RT_INIT);
127+
trace_data_read(cthread, 0, reuse_data_view->horizon, 0, reuse_data_view->data);
128+
129+
memcpy(in_view->data, reuse_data_view->data, in_view->horizon);
130+
131+
trace_data_write(cthread, in_view->horizon, in_view->data);
132+
trace_state_restore(cthread);
133+
134+
in_view->reuse_data_view = NULL;
135+
136+
/* We don't need our predecessor anymore */
137+
__built_in_wstream_df_dec_view_ref(reuse_data_view, 1);
138+
__built_in_wstream_df_dec_frame_ref(reuse_data_view->owner, 1);
139+
140+
in_view->copy_count++;
141+
142+
/* if(in_view->horizon > 10000) */
143+
/* printf("COPY: %d and %d (%d bytes)\n", reuse_numa_node, this_numa_node, in_view->horizon); */
144+
} else {
145+
/* Just reuse the data pointer. There's no need to increment our
146+
* predecessor's reference counter as this was already done
147+
* before.*/
148+
in_view->data = reuse_data_view->data;
149+
reuse_data_view->data = NULL;
150+
151+
in_view->reuse_count++;
152+
153+
/* if(in_view->horizon > 10000) */
154+
/* printf("REUSE\n"); */
155+
}
156+
157+
assert(in_view->data);
158+
assert(in_view->refcount > 0);
159+
}
160+
161+
void __built_in_wstream_df_reuse_update_data(void* v)
162+
{
163+
/* Fake output view of the task that terminates */
164+
wstream_df_view_p out_view = v;
165+
166+
/* Input view of the task that terminates */
167+
wstream_df_view_p in_view = out_view->reuse_associated_view;
168+
169+
/* If we don't have a consumer there's nothing to do */
170+
if(!out_view->reuse_consumer_view)
171+
return;
172+
173+
/* For consumer views that are reuse views we need to propagate
174+
* our producer's view if we have reused its data */
175+
if(is_reuse_view(out_view->reuse_consumer_view)) {
176+
if(!reuse_view_has_own_data(in_view)) {
177+
/* Transfer data ownership to this view */
178+
in_view->reuse_data_view->data = NULL;
179+
180+
/* /\* Set our consumer's reuse data view to our predecessor *\/ */
181+
/* out_view->reuse_consumer_view->reuse_data_view = in_view->reuse_data_view; */
182+
183+
/* /\* Set out predecessor's consumer to our consumer *\/ */
184+
/* in_view->reuse_data_view->reuse_consumer_view = out_view->reuse_consumer_view; */
185+
186+
/* Increase our predecessor's refcount as our consumer potentially
187+
* reuses it */
188+
/* __built_in_wstream_df_inc_view_ref(in_view->reuse_data_view, 1); */
189+
/* __built_in_wstream_df_inc_frame_ref(in_view->reuse_data_view->owner, 1); */
190+
191+
/* Our consumer has speculatively incremented our reference
192+
* counter. All the references from the consumer to us have been
193+
* overwritten, so we can safely decrement out own reference
194+
* counter. */
195+
/* __built_in_wstream_df_dec_view_ref(in_view, 1); */
196+
/* __built_in_wstream_df_dec_frame_ref(in_view->owner, 1); */
197+
198+
__built_in_wstream_df_dec_view_ref(in_view->reuse_data_view, 1);
199+
__built_in_wstream_df_dec_frame_ref(in_view->reuse_data_view->owner, 1);
200+
}
201+
} else {
202+
/* The consumer view is an ordinary input view and we just need to
203+
* copy data to its input buffer. */
204+
memcpy(out_view->reuse_consumer_view->data, in_view->data, in_view->horizon);
205+
206+
/* Decrement our own reference counter */
207+
__built_in_wstream_df_dec_view_ref(in_view, 1);
208+
__built_in_wstream_df_dec_frame_ref(in_view->owner, 1);
209+
}
210+
}

libworkstream_df/reuse.h

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#include "wstream_df.h"
2+
3+
static inline int is_reuse_view(wstream_df_view_p v)
4+
{
5+
return (v->reuse_associated_view != NULL);
6+
}
7+
8+
static inline int reuse_view_has_reuse_predecessor(wstream_df_view_p v)
9+
{
10+
return (v->reuse_data_view != NULL && is_reuse_view(v->reuse_data_view));
11+
}
12+
13+
static inline int reuse_view_has_own_data(wstream_df_view_p v)
14+
{
15+
return !v->reuse_data_view;
16+
}
17+
18+
void __built_in_wstream_df_reuse_prepare_data(void* v);
19+
void __built_in_wstream_df_reuse_update_data(void* v);
20+
void match_reuse_output_clause_with_reuse_input_clause(wstream_df_view_p out_view, wstream_df_view_p in_view);
21+
void match_reuse_input_clause_with_output_clause(wstream_df_view_p out_view, wstream_df_view_p in_view);
22+
void match_reuse_output_clause_with_input_clause(wstream_df_view_p out_view, wstream_df_view_p in_view);
23+
24+
void check_reuse_copy(wstream_df_frame_p fp);
25+

0 commit comments

Comments
 (0)