Skip to content

Commit 86c5e48

Browse files
committed
fix memory allocation bug: WABBIT now is more certain to allocate exactly what --memory tells it to
1 parent 10aacfa commit 86c5e48

File tree

8 files changed

+160
-192
lines changed

8 files changed

+160
-192
lines changed

LIB/MESH/allocate_grid.f90

Lines changed: 104 additions & 176 deletions
Large diffs are not rendered by default.

LIB/MODULE/module_globals.f90

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ module module_globals
3232
! this parameter is a hack. in most parts of the code, a block has n_eqn component entries.
3333
! universality dictates that we can also use a different number of components, for example
3434
! when syn'ing the mask function (which in many cases has six entries.)
35-
integer, public :: N_MAX_COMPONENTS = 6
35+
! New in 06/2021: the hack continues. We now set this parameter at different places
36+
! to save on memory. That can be params%n_eqn (default in simulations), 6 (if mask is synced). The new default is 3,
37+
! for postprocessing.
38+
integer, public :: N_MAX_COMPONENTS = 3
3639

3740
!subroutines of this module
3841
interface abort

LIB/MPI/module_mpi.f90

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,9 @@ subroutine init_ghost_nodes( params )
198198
allocate( tmp_block( Bs(1)+2*g, Bs(2)+2*g, 1, Neqn) )
199199
end if
200200

201-
! size of ghost nodes buffer. Note this contains only the ghost nodes layer
202-
! for all my blocks. previous versions allocated one of those per "friend"
201+
! size of ghost nodes buffer. The worst case is that we have to send ALL my ghost node
202+
! points to another rank, but this happens only if ALL my blocks have ONLY neighbors
203+
! on other MPIRANKS.
203204
if ( params%dim==3 ) then
204205
buffer_N = number_blocks * Neqn * ( (Bs(1)+2*g)*(Bs(2)+2*g)*(Bs(3)+2*g) - (Bs(1)*Bs(2)*Bs(3)) )
205206
else
@@ -213,20 +214,14 @@ subroutine init_ghost_nodes( params )
213214
! allocate synch buffer
214215
if (rank==0) then
215216
write(*,'("GHOSTS-INIT: Attempting to allocate the ghost-sync-buffer.")')
216-
217-
write(*,'("GHOSTS-INIT: buffer_N_int=",i12," buffer_N=",i12," Nstages=",i1)') &
218-
buffer_N_int, buffer_N, Nstages
219-
220-
write(*,'("GHOSTS-INIT: On each MPIRANK, Int buffer:", f9.4, "GB")') &
221-
2.0*dble(buffer_N_int)*dble(Nstages)*8e-9
222-
223-
write(*,'("GHOSTS-INIT: On each MPIRANK, Real buffer:", f9.4, "GB")') &
224-
2.0*dble(buffer_N)*dble(Nstages)*8e-9
217+
write(*,'("GHOSTS-INIT: buffer_N_int=",i12," buffer_N=",i12," Nstages=",i1)') buffer_N_int, buffer_N, Nstages
218+
write(*,'("GHOSTS-INIT: Int buffer:", f9.4, " GB per rank")') 2.0*dble(buffer_N_int)*dble(Nstages)*8e-9
219+
write(*,'("GHOSTS-INIT: Real buffer:", f9.4, " GB per rank")') 2.0*dble(buffer_N)*dble(Nstages)*8e-9
225220
write(*,'("---------------- allocating now ----------------")')
226221
endif
227222

228223
! wait now so that if allocation fails, we get at least the above info
229-
call MPI_barrier( WABBIT_COMM, status(1))
224+
call MPI_barrier( WABBIT_COMM, status(1) )
230225

231226
allocate( int_send_buffer( 1:buffer_N_int, 1:Nstages), stat=status(1) )
232227
allocate( int_recv_buffer( 1:buffer_N_int, 1:Nstages), stat=status(2) )
@@ -236,7 +231,6 @@ subroutine init_ghost_nodes( params )
236231
if (maxval(status) /= 0) call abort(999999, "Buffer allocation failed. Not enough memory?")
237232

238233
if (rank==0) then
239-
240234
write(*,'("GHOSTS-INIT: on each mpirank, Allocated ",A25," SHAPE=",7(i9,1x))') &
241235
"new_send_buffer", shape(new_send_buffer)
242236

LIB/MPI/synchronize_ghosts_generic.f90

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ subroutine synchronize_ghosts_generic_sequence( params, lgt_block, hvy_block, hv
3131

3232
integer(kind=ik) :: ijk(2,3)
3333
integer(kind=ik) :: bounds_type, istage, istage_buffer(1:4), rounds(1:4), inverse
34+
logical, save :: informed = .false.
3435

3536

3637
if (.not. ghost_nodes_module_ready) then
@@ -42,8 +43,23 @@ subroutine synchronize_ghosts_generic_sequence( params, lgt_block, hvy_block, hv
4243
! if this mpirank has no active blocks, it has nothing to do here.
4344
if (hvy_n == 0) return
4445

45-
if (size(hvy_block,4)>N_max_components) then
46-
call abort(160720191,"You try to ghost-sync a vector with too many components.")
46+
if (size(hvy_block,4)>N_MAX_COMPONENTS .and. .not. informed) then
47+
if (params%rank ==0) then
48+
write(*,*) "-------------------------------------------------------------------------"
49+
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
50+
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
51+
write(*,*) "-------------------------------------------------------------------------"
52+
write(*,*) " A warning from the ghost nodes module: we have allocated a buffer with an estimation for"
53+
write(*,*) " neqn=", N_MAX_COMPONENTS, " components of a vector, but you try to sync"
54+
write(*,*) " neqn=", size(hvy_block,4), " This may work just fine: but in some (rare) cases, "
55+
write(*,*) " we will see a buffer overflow. The code will then abort with an error, and you have"
56+
write(*,*) " to restart this simulation with more memory."
57+
write(*,*) "-------------------------------------------------------------------------"
58+
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
59+
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
60+
write(*,*) "-------------------------------------------------------------------------"
61+
endif
62+
informed = .true.
4763
endif
4864

4965
Bs = params%Bs
@@ -1053,6 +1069,9 @@ subroutine AppendLineToBuffer( int_send_buffer, new_send_buffer, buffer_size, ne
10531069

10541070
! real data
10551071
if (buffer_size>0) then
1072+
if (i0+buffer_size-1 >= size(new_send_buffer,1)) then
1073+
call abort(202106049, "Internal bug: we ran out of space for the ghost nodes. Restart simulation with more memory.")
1074+
endif
10561075
new_send_buffer( i0:i0+buffer_size-1, istage ) = line_buffer(1:buffer_size)
10571076
endif
10581077

LIB/PARAMS/ini_file_to_params.f90

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ subroutine ini_file_to_params( params, filename )
104104
call read_param_mpi(FILE, 'VPM', 'mask_time_independent_part', params%mask_time_independent_part, .true.)
105105
call read_param_mpi(FILE, 'VPM', 'dont_use_pruned_tree_mask', params%dont_use_pruned_tree_mask, .false.)
106106

107+
if (params%physics_type == "ACM-new") then
108+
if (params%penalization) then
109+
if ((.not.params%dont_use_pruned_tree_mask).and.(params%mask_time_independent_part)) then
110+
! we sync the mask array in this case, which has 6 components
111+
N_MAX_COMPONENTS = max(6, params%n_eqn)
112+
endif
113+
endif
114+
endif
115+
107116
! decide if we use hartens point value multiresolution transform, which uses a coarsening operator
108117
! that just takes every 2nd grid point or biorthogonal wavlets, which apply a smoothing filter (lowpass)
109118
! prior to downsampling.
@@ -270,6 +279,10 @@ subroutine ini_blocks(params, FILE )
270279
call abort(170619,"Error: Max treelevel cannot be larger 18 (64bit long integer problem) ")
271280
end if
272281

282+
! the default case is that we synchronize (ghosts) with n-eqn compontents in the vector
283+
! may be overwritten if pruned tree mask is used (by six)
284+
N_MAX_COMPONENTS = params%n_eqn
285+
273286
! read switch to turn on|off mesh refinement
274287
call read_param_mpi(FILE, 'Blocks', 'adapt_mesh', params%adapt_mesh, .true. )
275288
call read_param_mpi(FILE, 'Blocks', 'adapt_inicond', params%adapt_inicond, params%adapt_mesh )

LIB/POSTPROCESSING/module_MOR.f90

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,8 @@ subroutine post_POD(params)
460460
call get_cmd_arg( "--start_from_eigenbasis", eigenbasis_files)
461461
call get_cmd_arg( "--components", n_components, default=1_ik)
462462

463+
N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)
464+
463465
!-------------------------------
464466
! Set some wabbit specific params
465467
!-------------------------------
@@ -751,6 +753,7 @@ subroutine post_PODerror(params)
751753
call get_cmd_arg( "--components", n_components, default=1_ik)
752754
call get_cmd_arg( "--iteration", iteration, default=1_ik)
753755

756+
N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)
754757

755758
if ( iteration>0 ) then
756759
if ( params%rank == 0 ) write(*,*) "Iteration reconstructed: " ,iteration
@@ -1320,6 +1323,7 @@ subroutine post_reconstruct(params)
13201323
call get_cmd_arg( "--iteration", iteration, default=-1_ik)
13211324
call get_cmd_arg( "--nmodes", N_modes_used, default=1_ik)
13221325

1326+
N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)
13231327

13241328
if ( iteration>0 ) then
13251329
save_all = .False.
@@ -1729,6 +1733,8 @@ subroutine post_timecoef_POD(params)
17291733
call get_cmd_arg( "--components", n_components, default=1_ik)
17301734
call get_cmd_arg( "--iteration", iteration, default=1)
17311735

1736+
N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)
1737+
17321738
if ( iteration>0 ) then
17331739
if ( params%rank == 0 ) write(*,*) "Iteration reconstructed: " ,iteration
17341740
endif

LIB/POSTPROCESSING/post_generate_forest.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ subroutine post_generate_forest(params)
6262
params%block_distribution = "sfc_hilbert"
6363
params%time_step_method = 'none'
6464

65+
N_MAX_COMPONENTS = params%n_eqn ! used for ghost node sync'ing (buffer allocation)
66+
6567

6668
! we have to allocate grid if this routine is called for the first time
6769
call allocate_forest(params, lgt_block, hvy_block, hvy_neighbor, lgt_active, &

TESTING/runtests.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ do
9595
echo "Writing output to: " ${logfile}
9696

9797
# run the actual test
98+
T2="$(date +%s)"
9899
./${ts} > $logfile
100+
T2="$(($(date +%s)-T2))"
101+
echo "Time used in test: ${T2} seconds"
99102

100103
if [ $? == 0 ]; then
101104
printf "%s \n" "${pass_color} pass ${end_color}"

0 commit comments

Comments
 (0)