Skip to content

Commit 18dfcd7

Browse files
authored
Add Space Filling Curve
Add Space Filling Curve as a load balance option and clean up some of the other files removing typos and removing more references to deleted code.
1 parent 48e8fc0 commit 18dfcd7

File tree

15 files changed

+912
-488
lines changed

15 files changed

+912
-488
lines changed

ref/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ EXEC = miniAMR.x
99

1010
OBJS = block.o check_sum.o comm_block.o comm.o comm_parent.o comm_refine.o \
1111
comm_util.o driver.o init.o main.o move.o pack.o plot.o profile.o \
12-
rcb.o refine.o stencil.o util.o
12+
rcb.o refine.o sfc.o stencil.o util.o
1313

1414
$(EXEC): $(OBJS)
1515
$(LD) $(LDFLAGS) -o $@ $(OBJS) $(LDLIBS)
@@ -54,6 +54,8 @@ rcb.o: block.h comm.h proto.h timer.h
5454

5555
refine.o: block.h comm.h proto.h timer.h
5656

57+
sfc.o: block.h comm.h proto.h timer.h
58+
5759
stencil.o: block.h comm.h proto.h
5860

5961
util.o: block.h comm.h proto.h timer.h

ref/README

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,18 @@ The list of arguments and their defaults is as follows:
7878

7979
--reorder - ordering of blocks
8080
This controls whether the blocks are ordered by the RCB algorithm
81-
or by a natural ordering of the processors. The default is 1 which
82-
selects the RCB ordering and the natural ordering is 0.
81+
or by a natural ordering of the processors. A setting of 1 selects
82+
the RCB ordering and the natural ordering is 0. The default depends
83+
on which load balance algorithm is chosen. If the RCB algorithm is
84+
chosen then the default is the RCB ordering and if the space filling
85+
curve algorithm is chosen then the default is the natural ordering.
86+
87+
--rcb or --sfc - chooses the algorithm for load balancing
88+
These two options choose the load balance algorithm. The Recursive
89+
Coordinate Bisection (RCB) algorithm is the default, but the option
90+
in included for completeness. The other option is the Space Filling
91+
Curve (SFC). This option is based on a Morton style space filling
92+
curve.
8393

8494
--npx - number of processors in the x direction
8595
--npy - number of processors in the y direction

ref/block.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ void split_blocks(void)
9393
num_refined++;
9494
pp = &parents[p];
9595
pp->number = bp->number;
96+
pp->num_prime = bp->num_prime;
9697
pp->level = bp->level;
9798
pp->parent = bp->parent;
9899
pp->parent_node = bp->parent_node;
@@ -138,6 +139,7 @@ void split_blocks(void)
138139
(p2[level+1]*npy*init_block_y) +
139140
(2*yp+j1))*(p2[level+1]*npx*init_block_x) +
140141
2*xp + i1 + block_start[level+1];
142+
bp1->num_prime = bp->num_prime + o*p8[num_refine - level - 1];
141143
add_sorted_list(m, bp1->number, (level+1));
142144
bp1->cen[0] = bp->cen[0] +
143145
(2*i1 - 1)*p2[num_refine - level - 1];
@@ -375,6 +377,7 @@ void consolidate_blocks(void)
375377
local_num_blocks[level]++;
376378
local_num_blocks[level+1] -= 8;
377379
bp->number = pp->number;
380+
bp->num_prime = pp->num_prime;
378381
pp->number = -1;
379382
bp->level = pp->level;
380383
bp->parent = pp->parent;

ref/block.h

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ typedef long long num_sz;
2929

3030
typedef struct {
3131
num_sz number;
32+
num_sz num_prime;
3233
int level;
3334
int refine;
3435
int new_proc;
@@ -47,6 +48,7 @@ block *blocks;
4748

4849
typedef struct {
4950
num_sz number;
51+
num_sz num_prime;
5052
int level;
5153
num_sz parent; // -1 if original block
5254
int parent_node;
@@ -73,38 +75,40 @@ int max_num_blocks;
7375
int num_refine;
7476
int uniform_refine;
7577
int x_block_size, y_block_size, z_block_size;
76-
int num_cells;
7778
int num_vars;
78-
int mat;
7979
int comm_vars;
8080
int init_block_x, init_block_y, init_block_z;
8181
int reorder;
8282
int npx, npy, npz;
8383
int inbalance;
8484
int refine_freq;
8585
int report_diffusion;
86-
int checksum_freq;
87-
int stages_per_ts;
8886
int error_tol;
8987
int num_tsteps;
88+
int use_time;
89+
double end_time;
90+
int stages_per_ts;
91+
int checksum_freq;
9092
int stencil;
9193
int report_perf;
9294
int plot_freq;
95+
int num_objects;
9396
int lb_opt;
9497
int block_change;
9598
int code;
9699
int permute;
97100
int nonblocking;
98101
int refine_ghost;
99-
int use_time;
100-
double end_time;
101-
int send_faces;
102102
int change_dir;
103103
int group_blocks;
104104
int limit_move;
105+
int send_faces;
106+
int use_rcb;
107+
105108
int first;
106109
int *dirs;
107-
110+
int num_cells;
111+
int mat;
108112
int max_num_parents;
109113
int num_parents;
110114
int max_active_parent;
@@ -130,7 +134,6 @@ double total_fp_divs;
130134
double total_fp_adds;
131135
double total_fp_muls;
132136

133-
int num_objects;
134137
typedef struct {
135138
int type;
136139
int bounce;
@@ -148,10 +151,18 @@ int num_dots;
148151
int max_num_dots;
149152
int max_active_dot;
150153
typedef struct {
151-
int cen[3];
152154
num_sz number;
153155
int n;
154156
int proc;
155157
int new_proc;
158+
int cen[3];
156159
} dot;
157160
dot *dots;
161+
typedef struct {
162+
num_sz number;
163+
num_sz num_prime;
164+
int n;
165+
int proc;
166+
int new_proc;
167+
} spot;
168+
spot *spots;

ref/driver.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void driver(void)
7979
comm(start, number, comm_stage);
8080
t4 = timer();
8181
timer_comm_all += t4 - t3;
82-
for (var = start; var < (start+number); var ++) {
82+
for (var = start; var < (start+number); var++) {
8383
stencil_driver(var, calc_stage);
8484
t3 = timer();
8585
timer_calc_all += t3 - t4;
@@ -120,11 +120,12 @@ void driver(void)
120120
delta = calc_time_step();
121121
if (sim_time >= end_time)
122122
done = 1;
123-
else
124-
sim_time += delta;
125123
} else
126124
if (ts >= num_tsteps)
127125
done = 1;
126+
127+
if (!done)
128+
sim_time += delta;
128129
}
129130

130131
end_time = sim_time;
@@ -160,7 +161,7 @@ double calc_time_step(void)
160161
}
161162
if (done)
162163
break;
163-
for (done = dir = 0; dir < 3; dir++) {
164+
for (dir = 0; dir < 3; dir++) {
164165
tmp = (fabs(op->move[dir]) + fabs(op->inc[dir]))*inv_cell_size[dir];
165166
if (tmp > delta)
166167
delta = tmp;

ref/init.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,13 @@ void init(void)
200200
max_mesh_size = mesh_size[2];
201201
if ((num_pes+1) > max_mesh_size)
202202
max_mesh_size = num_pes + 1;
203-
bin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
204-
gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
203+
if (use_rcb) {
204+
bin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
205+
gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
206+
} else {
207+
bin = (int *) ma_malloc(global_active*sizeof(int), __FILE__, __LINE__);
208+
gbin = (int *) ma_malloc(global_active*sizeof(int), __FILE__, __LINE__);
209+
}
205210
if (stencil == 7)
206211
f = 0;
207212
else
@@ -217,6 +222,7 @@ void init(void)
217222
bp = &blocks[o];
218223
bp->level = 0;
219224
bp->number = n;
225+
bp->num_prime = n*p8[num_refine];
220226
bp->parent = -1;
221227
bp->parent_node = my_pe;
222228
bp->cen[0] = i1*size + size/2;

ref/main.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
int main(int argc, char** argv)
3838
{
3939
int i, ierr, object_num;
40-
int params[38];
40+
int params[39];
4141
double *objs;
4242
#include "param.h"
4343

@@ -126,6 +126,10 @@ int main(int argc, char** argv)
126126
limit_move = atoi(argv[++i]);
127127
else if (!strcmp(argv[i], "--send_faces"))
128128
send_faces = 1;
129+
else if (!strcmp(argv[i], "--rcb"))
130+
use_rcb = 1; // default, but included for completeness
131+
else if (!strcmp(argv[i], "--sfc"))
132+
use_rcb = 0;
129133
else if (!strcmp(argv[i], "--num_objects")) {
130134
num_objects = atoi(argv[++i]);
131135
objects = (object *) ma_malloc(num_objects*sizeof(object),
@@ -165,6 +169,9 @@ int main(int argc, char** argv)
165169
MPI_Abort(MPI_COMM_WORLD, -1);
166170
}
167171

172+
if (reorder == -1)
173+
reorder = use_rcb;
174+
168175
if (check_input())
169176
MPI_Abort(MPI_COMM_WORLD, -1);
170177

@@ -209,8 +216,9 @@ int main(int argc, char** argv)
209216
params[35] = group_blocks;
210217
params[36] = limit_move;
211218
params[37] = send_faces;
219+
params[38] = use_rcb;
212220

213-
MPI_Bcast(params, 38, MPI_INT, 0, MPI_COMM_WORLD);
221+
MPI_Bcast(params, 39, MPI_INT, 0, MPI_COMM_WORLD);
214222

215223
objs = (double *) ma_malloc(14*num_objects*sizeof(double),
216224
__FILE__, __LINE__);
@@ -234,7 +242,7 @@ int main(int argc, char** argv)
234242
MPI_Bcast(objs, (14*num_objects), MPI_DOUBLE, 0, MPI_COMM_WORLD);
235243
free(objs);
236244
} else {
237-
MPI_Bcast(params, 38, MPI_INT, 0, MPI_COMM_WORLD);
245+
MPI_Bcast(params, 39, MPI_INT, 0, MPI_COMM_WORLD);
238246
max_num_blocks = params[ 0];
239247
num_refine = params[ 1];
240248
uniform_refine = params[ 2];
@@ -273,6 +281,7 @@ int main(int argc, char** argv)
273281
group_blocks = params[35];
274282
limit_move = params[36];
275283
send_faces = params[37];
284+
use_rcb = params[38];
276285

277286
objects = (object *) ma_malloc(num_objects*sizeof(object),
278287
__FILE__, __LINE__);
@@ -366,6 +375,8 @@ void print_help_message(void)
366375
printf("--group_blocks - change the RCB algorithm so that a group of blocks with the same center all get put onto the same side of a cut\n");
367376
printf("--limit_move - limit the number of blocks that can be moved during load balance (number that is a percentage of the total number of blocks)\n");
368377
printf("--send_faces - send each face individually instead of packing all faces going to a rank together\n");
378+
printf("--rcb - use RCB algorithm for load balancing (default)\n");
379+
printf("--sfc - use Space Filling Curve algorithm for load balancing\n");
369380
printf("--num_objects - (>= 0) number of objects to cause refinement\n");
370381
printf("--object - type, position, movement, size, size rate of change\n");
371382

@@ -420,9 +431,15 @@ void allocate(void)
420431
parents[n].number = -1;
421432

422433
max_num_dots = 2*max_num_blocks; // Guess at number needed
423-
dots = (dot *) ma_malloc(max_num_dots*sizeof(dot), __FILE__, __LINE__);
424-
for (n = 0; n < max_num_dots; n++)
425-
dots[n].number = -1;
434+
if (use_rcb) {
435+
dots = (dot *) ma_malloc(max_num_dots*sizeof(dot), __FILE__, __LINE__);
436+
for (n = 0; n < max_num_dots; n++)
437+
dots[n].number = -1;
438+
} else {
439+
spots = (spot *) ma_malloc(max_num_dots*sizeof(spot), __FILE__, __LINE__);
440+
for (n = 0; n < max_num_dots; n++)
441+
spots[n].number = -1;
442+
}
426443

427444
grid_sum = (double *)ma_malloc(num_vars*sizeof(double), __FILE__, __LINE__);
428445

@@ -529,8 +546,8 @@ void allocate(void)
529546
if (num_refine) {
530547
s_buf_size = (int) (0.10*((double)max_num_blocks))*comm_vars*
531548
(x_block_size+2)*(y_block_size+2)*(z_block_size+2);
532-
if (s_buf_size < (num_vars*x_block_size*y_block_size*z_block_size + 47))
533-
s_buf_size = num_vars*x_block_size*y_block_size*z_block_size + 47;
549+
if (s_buf_size < (num_vars*x_block_size*y_block_size*z_block_size + 49))
550+
s_buf_size = num_vars*x_block_size*y_block_size*z_block_size + 49;
534551
r_buf_size = 5*s_buf_size;
535552
} else {
536553
i = init_block_x*(x_block_size+2);

ref/pack.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ void pack_block(int n)
4747
send_ll[1] = (long long) (-2 - bp->parent);
4848
else
4949
send_ll[1] = (long long) bp->parent;
50-
l = 4;
50+
send_ll[2] = (long long) bp->num_prime;
51+
l = 6;
5152
send_int[l++] = bp->level;
5253
send_int[l++] = bp->refine;
5354
send_int[l++] = bp->parent_node;
@@ -81,7 +82,8 @@ void unpack_block(int n)
8182

8283
bp->number = (num_sz) recv_ll[0];
8384
bp->parent = (num_sz) recv_ll[1];
84-
l = 4;
85+
bp->num_prime = (num_sz) recv_ll[2];
86+
l = 6;
8587
bp->level = recv_int[l++];
8688
bp->refine = recv_int[l++];
8789
bp->parent_node = recv_int[l++];

ref/param.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ comm_vars = 0;
3535
init_block_x = 1;
3636
init_block_y = 1;
3737
init_block_z = 1;
38-
reorder = 1;
38+
reorder = -1;
3939
npx = 1;
4040
npy = 1;
4141
npz = 1;
@@ -49,7 +49,7 @@ end_time = 0.0;
4949
stages_per_ts = 20;
5050
checksum_freq = 5;
5151
stencil = 7;
52-
report_perf = 4;
52+
report_perf = 12;
5353
plot_freq = 0;
5454
num_objects = 0;
5555
lb_opt = 1;
@@ -62,3 +62,4 @@ change_dir = 0;
6262
group_blocks = 0;
6363
limit_move = 0;
6464
send_faces = 0;
65+
use_rcb = 1;

0 commit comments

Comments
 (0)