Skip to content

Commit

Permalink
Merge pull request #98 from CaNS-World/fix-cons-mom
Browse files Browse the repository at this point in the history
Fix mass-consistent advection.
  • Loading branch information
p-costa authored Mar 4, 2025
2 parents dd4fe06 + 96ef73a commit ff75500
Show file tree
Hide file tree
Showing 60 changed files with 649 additions and 612 deletions.
3 changes: 2 additions & 1 deletion build.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ TIMING=1 # best = 1
PENCIL_AXIS=1 # = 1/2/3 for X/Y/Z-aligned pencils
SINGLE_PRECISION=0 # perform the whole calculation in single precision
CONSTANT_COEFFS_POISSON=1
CONSISTENT_ADVECTION=1
#
# GPU-related
#
GPU=0
GPU=0
4 changes: 2 additions & 2 deletions configs/flags.mk
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ ifeq ($(strip $(CONSTANT_COEFFS_POISSON)),1)
DEFINES += -D_CONSTANT_COEFFS_POISSON
endif

ifeq ($(strip $(CONSERVATIVE_MOMENTUM)),1)
DEFINES += -D_CONSERVATIVE_MOMENTUM
ifeq ($(strip $(CONSISTENT_ADVECTION)),1)
DEFINES += -D_CONSISTENT_ADVECTION
endif

ifeq ($(strip $(SPLIT_VISCOUS_DIFUSSION)),1)
Expand Down
2 changes: 1 addition & 1 deletion dependencies/cuDecomp
Submodule cuDecomp updated 44 files
+15 −7 CMakeLists.txt
+0 −122 Makefile
+4 −18 README.md
+1 −1 benchmark/CMakeLists.txt
+0 −28 benchmark/Makefile
+48 −27 benchmark/benchmark.cu
+0 −18 configs/nvhpcsdk.conf
+0 −28 configs/nvhpcsdk_marconi100.conf
+0 −23 configs/nvhpcsdk_pm.conf
+0 −27 configs/nvhpcsdk_summit_spectrum.conf
+2 −0 docs/api/f_api.rst
+30 −0 docs/basic_usage.rst
+8 −0 docs/env_vars.rst
+0 −20 examples/Makefile
+0 −17 examples/cc/basic_usage/Makefile
+0 −8 examples/cc/taylor_green/Makefile
+210 −31 examples/cc/taylor_green/tg.cu
+0 −11 examples/fortran/basic_usage/Makefile
+0 −5 examples/fortran/poisson/Makefile
+4 −1 examples/fortran/poisson/poisson.f90
+23 −0 examples/fortran/taylor_green/CMakeLists.txt
+839 −0 examples/fortran/taylor_green/tg.f90
+24 −17 include/cudecomp.h
+17 −0 include/internal/checks.h
+8 −8 include/internal/comm_routines.h
+124 −60 include/internal/common.h
+64 −0 include/internal/cuda_wrap.h
+7 −0 include/internal/exceptions.h
+54 −75 include/internal/halo.h
+59 −0 include/internal/hashes.h
+57 −0 include/internal/nvml_wrap.h
+254 −200 include/internal/transpose.h
+63 −103 src/autotune.cc
+77 −0 src/cuda_wrap.cc
+255 −13 src/cudecomp.cc
+23 −2 src/cudecomp_m.cuf
+76 −0 src/nvml_wrap.cc
+0 −74 tests/Makefile
+17 −1 tests/cc/halo_test.cc
+47 −11 tests/cc/transpose_test.cc
+37 −25 tests/fortran/halo_test.f90
+54 −33 tests/fortran/transpose_test.f90
+73 −28 tests/test_config.yaml
+14 −0 tests/test_runner.py
4 changes: 2 additions & 2 deletions dependencies/external.mk
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
ifeq ($(strip $(GPU)),1)
libs: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
cd $(LIBS_DIR)/2decomp-fft && make
cd $(LIBS_DIR)/cuDecomp && make lib -j
cd $(LIBS_DIR)/cuDecomp && mkdir -p build && cd build && cmake .. && make -j
libsclean: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
cd $(LIBS_DIR)/2decomp-fft && make clean
cd $(LIBS_DIR)/cuDecomp && make clean
cd $(LIBS_DIR)/cuDecomp/build && make clean; cd .. && rm -rf build
else
libs: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
cd $(LIBS_DIR)/2decomp-fft && make
Expand Down
2 changes: 1 addition & 1 deletion docs/INFO_COMPILING.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ In this file, `FCOMP` can be one of `GNU` (`gfortran`), `INTEL` (`ifort`), `NVID
* `PENCIL_AXIS` : sets the default pencil direction, one of [1,2,3] for [X,Y,Z]-aligned pencils; X-aligned is the default and should be optimal for all cases except for Z implicit diffusion, where using Z-pencils is recommended
* `SINGLE_PRECISION` : calculation will be carried out in single precision (the default precision is double)
* `CONSTANT_COEFFS_POISSON` : enables the use of a direct FFT solver for the pressure Poisson equation (if set to 0, an iterative multigrid solver based on the HYPRE library will be used. This option is only available for CPU compilation)
* `CONSERVATIVE_MOMENTUM` : solves the momentum equation with advective terms in divergence form
* `CONSISTENT_ADVECTION` : solves the momentum equation with advective terms in divergence form
* `SCALAR` : enables the transport equation for a scalar field (e.g., temperature)
* `BOUSSINESQ_BUOYANCY` : enables thermal convection within each phase under the Boussinesq approximation
* `GPU` : enables GPU accelerated runs (requires the `FCOMP=NVIDIA`)
Expand Down
15 changes: 9 additions & 6 deletions docs/INFO_INPUT.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Consider the following input file as example (corresponds to a turbulent plane c
ng(1:3) = 512, 256, 144
l(1:3) = 6., 3., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5, dt_f = -1
cfl = 0.95, dtmax = 1.e5, dt_f = -1
is_solve_ns = T, is_track_interface = T
inivel = 'poi'
is_wallturb = T, is_forced_hit = F
Expand Down Expand Up @@ -98,13 +98,13 @@ These lines set the computational grid.
---

```fortran
cfl = 0.95, dtmin = 1.e5, dt_f = -1
cfl = 0.95, dtmax = 1.e5, dt_f = -1
```

This line controls the simulation time step.

The time step is set to be equal to `min(cfl*dtmax,dtmin)`, i.e. the minimum value between `dtmin` and `cfl` times the maximum allowable time step `dtmax` (computed every `ickeck` time steps; see below).
`dtmin` is therefore used when a constant time step, smaller than `cfl*dtmax`, is required. If not, it should be set to a high value so that the time step is dynamically adjusted to `cfl*dtmax`. Alternatively, one can force the simulation to advance with a constant time step of arbitrary value, independent of the `dtmax` evaluation, by changing the value of `dt_f` from `-1` to the desired time step.
The time step is set to be equal to `min(cfl*dtmax,dtmax)`, i.e. the minimum value between `dtmax` and `cfl` times the maximum allowable time step `dtmax` (computed every `ickeck` time steps; see below).
`dtmax` is therefore used when a constant time step, smaller than `cfl*dtmax`, is required. If not, it should be set to a high value so that the time step is dynamically adjusted to `cfl*dtmax`. Alternatively, one can force the simulation to advance with a constant time step of arbitrary value, independent of the `dtmax` evaluation, by changing the value of `dt_f` from `-1` to the desired time step.

---

Expand Down Expand Up @@ -197,7 +197,7 @@ These lines set the frequency of time step checking and output:
* every `iout3d` time steps **3d scalar fields** are written to a file
* every `isave` time steps a **checkpoint file** is written (`fld_???????.bin`), and a symbolic link for the restart file, `fld.bin`, will point to this last save so that, by default, the last saved checkpoint file is used to restart the simulation

1d, 2d and 3d outputs can be tweaked modifying files `out?d.h90`, and re-compiling the source. See also `output.f90` for more details.
1d, 2d and 3d outputs can be tweaked modifying files `out?d.h90`, and re-compiling the source. See also `output.f90` for more details. _Set any of these variables to `0` to skip the corresponding operation._

---

Expand Down Expand Up @@ -314,10 +314,13 @@ inipsi = 'bub3'
* `drp1`: planar film (heavier phase)
* `drp2`: two-dimensional (heavier phase) droplets
* `drp3`: three-dimensional (heavier phase) droplets
* `dis1`: planar film (same as `drp1`)
* `dis2`: two-dimensional dispersed circles (same as `drp2`)
* `dis3`: three-dimensional dispersed spheres (same as `drp3`)
* `cap-wav-1d`: planar small-amplitude capillary wave
* `zalesak-disk`: two-dimensional (lighter phase) slotted disk

See `two_fluid.f90` for more details. For the `bub[1-3]` and `drp[1-3]` initial fields, the position and size of the films/bubbles/droplets can be specified by a `spheres.in` file, where each line corresponds to an individual spherical/circular/planar bubble/droplet, specifying the cartesian coordinates of its center of mass, and its radius.
See `two_fluid.f90` for more details. For the `bub[1-3]`, `drp[1-3]`, and `dis[1-3]` initial fields, the position and size of the films/bubbles/droplets can be specified by a `spheres.in` file, where each line corresponds to an individual spherical/circular/planar bubble/droplet, specifying the cartesian coordinates of its center of mass, and its radius. **Note:** _The recommended way to initialize a dispersed multiphase flow is to use `dis?`, with phase 1 being the dispersed phase, and phase 2 the continuous one. This is important because `inivel` above considers phase 2 as the continuous one._
For example, the following file introduces in the computational domain two bubbles/droplets, the first centered at `[x,y,z] = [0.3,1.,0.5]` with a radius of `0.25`, and the second centered at `[1.5,0.6,1.]` with radius `0.5`.

```fortran
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 128, 128, 128
l(1:3) = 1., 1., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'zer'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 512, 512, 512
l(1:3) = 6.2831853071795, 6.283185307179586, 6.283185307179586
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1600.
inivel = 'tgv'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 512, 256, 144
l(1:3) = 6., 3., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 5640.
inivel = 'poi'
is_wallturb = T
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = T, F, F
velf(1:3) = 1., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 512, 128, 128
l(1:3) = 10., 1., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 4410.
inivel = 'poi'
is_wallturb = T
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = T, F, F
velf(1:3) = 1., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/closed_box/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'zer'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/couette/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1.5, 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'cou'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/developing_channel/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1.5, 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'zer'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/developing_duct/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1.5, 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'zer'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/half_channel/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1.5, 1.
gtype = 2, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'hcl'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = T, F, F
velf(1:3) = 1., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/lid_driven_cavity/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 1., 1., 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'zer'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/periodic_channel/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 3., 1.5, 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'log'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = T, F, F
velf(1:3) = 1., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/periodic_duct/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 64, 64, 64
l(1:3) = 3., 1.5, 1.
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 1000.
inivel = 'log'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = T, F, F
velf(1:3) = 1., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
6 changes: 3 additions & 3 deletions examples/_CaNS-example-files/taylor_green_vortex_2d/input.nml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 32, 32, 4
l(1:3) = 6.2831853071795, 6.283185307179586, 0.125
gtype = 1, gr = 0.
cfl = 0.95, dtmin = 0.1
cfl = 0.95, dtmax = 0.1
visci = 100.
inivel = 'tgw'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 1, 1
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ng(1:3) = 128, 128, 256
l(1:3) = 40., 20., 72.
gtype = 2, gr = 4.
cfl = 0.95, dtmin = 1.e5
cfl = 0.95, dtmax = 1.e5
visci = 500.
inivel = 'tbl'
is_wallturb = F
Expand All @@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
is_forced(1:3) = F, F, F
velf(1:3) = 0., 0., 0.
dims(1:2) = 0, 0
\
/

&cudecomp
cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
\
/
Loading

0 comments on commit ff75500

Please sign in to comment.