Merge pull request #98 from CaNS-World/fix-cons-mom

Fix mass-consistent advection.
CaNS-World · Mar 4, 2025 · ff75500 · ff75500
2 parents dd4fe06 + 96ef73a
commit ff75500
Show file tree

Hide file tree

Showing 60 changed files with 649 additions and 612 deletions.
diff --git a/build.conf b/build.conf
@@ -14,7 +14,8 @@ TIMING=1                   # best = 1
 PENCIL_AXIS=1              # = 1/2/3 for X/Y/Z-aligned pencils
 SINGLE_PRECISION=0         # perform the whole calculation in single precision
 CONSTANT_COEFFS_POISSON=1
+CONSISTENT_ADVECTION=1
 #
 # GPU-related
 #
-GPU=0
+GPU=0
diff --git a/configs/flags.mk b/configs/flags.mk
@@ -135,8 +135,8 @@ ifeq ($(strip $(CONSTANT_COEFFS_POISSON)),1)
 DEFINES += -D_CONSTANT_COEFFS_POISSON
 endif
 
-ifeq ($(strip $(CONSERVATIVE_MOMENTUM)),1)
-DEFINES += -D_CONSERVATIVE_MOMENTUM
+ifeq ($(strip $(CONSISTENT_ADVECTION)),1)
+DEFINES += -D_CONSISTENT_ADVECTION
 endif
 
 ifeq ($(strip $(SPLIT_VISCOUS_DIFUSSION)),1)

diff --git a/dependencies/cuDecomp b/dependencies/cuDecomp
diff --git a/dependencies/external.mk b/dependencies/external.mk
@@ -4,10 +4,10 @@
 ifeq ($(strip $(GPU)),1)
 libs: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
 	cd $(LIBS_DIR)/2decomp-fft && make
-	cd $(LIBS_DIR)/cuDecomp && make lib -j
+	cd $(LIBS_DIR)/cuDecomp && mkdir -p build && cd build && cmake .. && make -j
 libsclean: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
 	cd $(LIBS_DIR)/2decomp-fft && make clean
-	cd $(LIBS_DIR)/cuDecomp && make clean
+	cd $(LIBS_DIR)/cuDecomp/build && make clean; cd .. && rm -rf build
 else
 libs: $(wildcard $(LIBS_DIR)/2decomp-fft/src/*.f90)
 	cd $(LIBS_DIR)/2decomp-fft && make

diff --git a/docs/INFO_COMPILING.md b/docs/INFO_COMPILING.md
@@ -34,7 +34,7 @@ In this file, `FCOMP` can be one of `GNU` (`gfortran`), `INTEL` (`ifort`), `NVID
  * `PENCIL_AXIS`              : sets the default pencil direction, one of [1,2,3] for [X,Y,Z]-aligned pencils; X-aligned is the default and should be optimal for all cases except for Z implicit diffusion, where using Z-pencils is recommended
  * `SINGLE_PRECISION`         : calculation will be carried out in single precision (the default precision is double)
  * `CONSTANT_COEFFS_POISSON`  : enables the use of a direct FFT solver for the pressure Poisson equation (if set to 0, an iterative multigrid solver based on the HYPRE library will be used. This option is only available for CPU compilation)
- * `CONSERVATIVE_MOMENTUM`    : solves the momentum equation with advective terms in divergence form
+ * `CONSISTENT_ADVECTION`   : solves the momentum equation with advective terms in divergence form
  * `SCALAR`                   : enables the transport equation for a scalar field (e.g., temperature)
  * `BOUSSINESQ_BUOYANCY`      : enables thermal convection within each phase under the Boussinesq approximation
  * `GPU`                      : enables GPU accelerated runs (requires the `FCOMP=NVIDIA`)

diff --git a/docs/INFO_INPUT.md b/docs/INFO_INPUT.md
@@ -8,7 +8,7 @@ Consider the following input file as example (corresponds to a turbulent plane c
 ng(1:3) = 512, 256, 144
 l(1:3) = 6., 3., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5, dt_f = -1
+cfl = 0.95, dtmax = 1.e5, dt_f = -1
 is_solve_ns = T, is_track_interface = T
 inivel = 'poi'
 is_wallturb = T, is_forced_hit = F
@@ -98,13 +98,13 @@ These lines set the computational grid.
 ---
 
 ```fortran
-cfl = 0.95, dtmin = 1.e5, dt_f = -1
+cfl = 0.95, dtmax = 1.e5, dt_f = -1
 ```
 
 This line controls the simulation time step.
 
-The time step is set to be equal to `min(cfl*dtmax,dtmin)`, i.e. the minimum value between `dtmin` and `cfl` times the maximum allowable time step `dtmax` (computed every `ickeck` time steps; see below).
-`dtmin` is therefore used when a constant time step, smaller than `cfl*dtmax`, is required. If not, it should be set to a high value so that the time step is dynamically adjusted to `cfl*dtmax`. Alternatively, one can force the simulation to advance with a constant time step of arbitrary value, independent of the `dtmax` evaluation, by changing the value of `dt_f` from `-1` to the desired time step.
+The time step is set to be equal to `min(cfl*dtmax,dtmax)`, i.e. the minimum value between `dtmax` and `cfl` times the maximum allowable time step `dtmax` (computed every `ickeck` time steps; see below).
+`dtmax` is therefore used when a constant time step, smaller than `cfl*dtmax`, is required. If not, it should be set to a high value so that the time step is dynamically adjusted to `cfl*dtmax`. Alternatively, one can force the simulation to advance with a constant time step of arbitrary value, independent of the `dtmax` evaluation, by changing the value of `dt_f` from `-1` to the desired time step.
 
 ---
 
@@ -197,7 +197,7 @@ These lines set the frequency of time step checking and output:
 * every `iout3d` time steps **3d scalar fields** are written to a file
 * every `isave`  time steps a **checkpoint file** is written (`fld_???????.bin`), and a symbolic link for the restart file, `fld.bin`, will point to this last save so that, by default, the last saved checkpoint file is used to restart the simulation
 
-1d, 2d and 3d outputs can be tweaked modifying files `out?d.h90`, and re-compiling the source. See also `output.f90` for more details.
+1d, 2d and 3d outputs can be tweaked modifying files `out?d.h90`, and re-compiling the source. See also `output.f90` for more details. _Set any of these variables to `0` to skip the corresponding operation._
 
 ---
 
@@ -314,10 +314,13 @@ inipsi          = 'bub3'
 * `drp1`: planar film (heavier phase)
 * `drp2`: two-dimensional (heavier phase) droplets
 * `drp3`: three-dimensional (heavier phase) droplets
+* `dis1`: planar film (same as `drp1`)
+* `dis2`: two-dimensional dispersed circles (same as `drp2`)
+* `dis3`: three-dimensional dispersed spheres (same as `drp3`)
 * `cap-wav-1d`: planar small-amplitude capillary wave
 * `zalesak-disk`: two-dimensional (lighter phase) slotted disk
 
-See `two_fluid.f90` for more details. For the `bub[1-3]` and `drp[1-3]` initial fields, the position and size of the films/bubbles/droplets can be specified by a `spheres.in` file, where each line corresponds to an individual spherical/circular/planar bubble/droplet, specifying the cartesian coordinates of its center of mass, and its radius.
+See `two_fluid.f90` for more details. For the `bub[1-3]`, `drp[1-3]`, and `dis[1-3]` initial fields, the position and size of the films/bubbles/droplets can be specified by a `spheres.in` file, where each line corresponds to an individual spherical/circular/planar bubble/droplet, specifying the cartesian coordinates of its center of mass, and its radius. **Note:** _The recommended way to initialize a dispersed multiphase flow is to use `dis?`, with phase 1 being the dispersed phase, and phase 2 the continuous one. This is important because `inivel` above considers phase 2 as the continuous one._
 For example, the following file introduces in the computational domain two bubbles/droplets, the first centered at `[x,y,z] = [0.3,1.,0.5]` with a radius of `0.25`, and the second centered at `[1.5,0.6,1.]` with radius `0.5`.
 
 ```fortran

diff --git a/examples/_CaNS-example-files/_manuscript_lid_driven_cavity/input.nml b/examples/_CaNS-example-files/_manuscript_lid_driven_cavity/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 128, 128, 128
 l(1:3) = 1., 1., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'zer'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/_manuscript_taylor_green_vortex/input.nml b/examples/_CaNS-example-files/_manuscript_taylor_green_vortex/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 512, 512, 512
 l(1:3) =  6.2831853071795, 6.283185307179586, 6.283185307179586
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1600.
 inivel = 'tgv'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/_manuscript_turbulent_channel/input.nml b/examples/_CaNS-example-files/_manuscript_turbulent_channel/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 512, 256, 144
 l(1:3) = 6., 3., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 5640.
 inivel = 'poi'
 is_wallturb = T
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = T, F, F
 velf(1:3) = 1., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/_manuscript_turbulent_duct/input.nml b/examples/_CaNS-example-files/_manuscript_turbulent_duct/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 512, 128, 128
 l(1:3) = 10., 1., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 4410.
 inivel = 'poi'
 is_wallturb = T
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = T, F, F
 velf(1:3) = 1., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/closed_box/input.nml b/examples/_CaNS-example-files/closed_box/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'zer'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/couette/input.nml b/examples/_CaNS-example-files/couette/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1.5, 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'cou'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/developing_channel/input.nml b/examples/_CaNS-example-files/developing_channel/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1.5, 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'zer'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/developing_duct/input.nml b/examples/_CaNS-example-files/developing_duct/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1.5, 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'zer'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/half_channel/input.nml b/examples/_CaNS-example-files/half_channel/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1.5, 1.
 gtype = 2, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'hcl'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = T, F, F
 velf(1:3) = 1., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/lid_driven_cavity/input.nml b/examples/_CaNS-example-files/lid_driven_cavity/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 1., 1., 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'zer'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/periodic_channel/input.nml b/examples/_CaNS-example-files/periodic_channel/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 3., 1.5, 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'log'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = T, F, F
 velf(1:3) = 1., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/periodic_duct/input.nml b/examples/_CaNS-example-files/periodic_duct/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 64, 64, 64
 l(1:3) = 3., 1.5, 1.
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 1000.
 inivel = 'log'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = T, F, F
 velf(1:3) = 1., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/taylor_green_vortex_2d/input.nml b/examples/_CaNS-example-files/taylor_green_vortex_2d/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 32, 32, 4
 l(1:3) = 6.2831853071795, 6.283185307179586, 0.125
 gtype = 1, gr = 0.
-cfl = 0.95, dtmin = 0.1
+cfl = 0.95, dtmax = 0.1
 visci = 100.
 inivel = 'tgw'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 1, 1
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
diff --git a/examples/_CaNS-example-files/temporal_boundary_layer/input.nml b/examples/_CaNS-example-files/temporal_boundary_layer/input.nml
@@ -2,7 +2,7 @@
 ng(1:3) = 128, 128, 256
 l(1:3) = 40., 20., 72.
 gtype = 2, gr = 4.
-cfl = 0.95, dtmin = 1.e5
+cfl = 0.95, dtmax = 1.e5
 visci = 500.
 inivel = 'tbl'
 is_wallturb = F
@@ -22,9 +22,9 @@ bforce(1:3) = 0., 0., 0.
 is_forced(1:3) = F, F, F
 velf(1:3) = 0., 0., 0.
 dims(1:2) = 0, 0
-\
+/
 
 &cudecomp
 cudecomp_t_comm_backend = 0, cudecomp_is_t_enable_nccl = T, cudecomp_is_t_enable_nvshmem = T
 cudecomp_h_comm_backend = 0, cudecomp_is_h_enable_nccl = T, cudecomp_is_h_enable_nvshmem = T
-\
+/
+15 −7		CMakeLists.txt
+0 −122		Makefile
+4 −18		README.md
+1 −1		benchmark/CMakeLists.txt
+0 −28		benchmark/Makefile
+48 −27		benchmark/benchmark.cu
+0 −18		configs/nvhpcsdk.conf
+0 −28		configs/nvhpcsdk_marconi100.conf
+0 −23		configs/nvhpcsdk_pm.conf
+0 −27		configs/nvhpcsdk_summit_spectrum.conf
+2 −0		docs/api/f_api.rst
+30 −0		docs/basic_usage.rst
+8 −0		docs/env_vars.rst
+0 −20		examples/Makefile
+0 −17		examples/cc/basic_usage/Makefile
+0 −8		examples/cc/taylor_green/Makefile
+210 −31		examples/cc/taylor_green/tg.cu
+0 −11		examples/fortran/basic_usage/Makefile
+0 −5		examples/fortran/poisson/Makefile
+4 −1		examples/fortran/poisson/poisson.f90
+23 −0		examples/fortran/taylor_green/CMakeLists.txt
+839 −0		examples/fortran/taylor_green/tg.f90
+24 −17		include/cudecomp.h
+17 −0		include/internal/checks.h
+8 −8		include/internal/comm_routines.h
+124 −60		include/internal/common.h
+64 −0		include/internal/cuda_wrap.h
+7 −0		include/internal/exceptions.h
+54 −75		include/internal/halo.h
+59 −0		include/internal/hashes.h
+57 −0		include/internal/nvml_wrap.h
+254 −200		include/internal/transpose.h
+63 −103		src/autotune.cc
+77 −0		src/cuda_wrap.cc
+255 −13		src/cudecomp.cc
+23 −2		src/cudecomp_m.cuf
+76 −0		src/nvml_wrap.cc
+0 −74		tests/Makefile
+17 −1		tests/cc/halo_test.cc
+47 −11		tests/cc/transpose_test.cc
+37 −25		tests/fortran/halo_test.f90
+54 −33		tests/fortran/transpose_test.f90
+73 −28		tests/test_config.yaml
+14 −0		tests/test_runner.py