Skip to content

Commit d56bcbb

Browse files
authored
Shock capturing with nonconservative_terms::True (#46)
* Start * Remove 3D condition * Complete 2D * Complete 3D * Add tests
1 parent cb499a0 commit d56bcbb

6 files changed

+999
-194
lines changed

src/solvers/dg_1d.jl

+37-35
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,41 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
181181
return nothing
182182
end
183183

184+
# Kernel for calculating DG volume integral contribution
185+
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
186+
volume_flux_arr, equations::AbstractEquations{1})
187+
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
188+
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
189+
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
190+
191+
if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
192+
# length(element_ids_dg) == size(du, 3)
193+
# length(element_ids_dgfv) == size(du, 3)
194+
195+
element_dg = element_ids_dg[k] # check if `element_dg` is zero
196+
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
197+
alpha_element = alpha[k]
198+
199+
@inbounds begin
200+
if element_dg != 0 # bad
201+
for ii in axes(du, 2)
202+
du[i, j, element_dg] += derivative_split[j, ii] *
203+
volume_flux_arr[i, j, ii, element_dg]
204+
end
205+
end
206+
207+
if element_dgfv != 0 # bad
208+
for ii in axes(du, 2)
209+
du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] *
210+
volume_flux_arr[i, j, ii, element_dgfv]
211+
end
212+
end
213+
end
214+
end
215+
216+
return nothing
217+
end
218+
184219
# Kernel for calculating pure DG and DG-FV volume fluxes
185220
function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
186221
element_ids_dgfv, derivative_split,
@@ -235,41 +270,6 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, f
235270
return nothing
236271
end
237272

238-
# Kernel for calculating DG volume integral contribution
239-
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
240-
volume_flux_arr, equations::AbstractEquations{1})
241-
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
242-
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
243-
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
244-
245-
if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
246-
# length(element_ids_dg) == size(du, 3)
247-
# length(element_ids_dgfv) == size(du, 3)
248-
249-
element_dg = element_ids_dg[k] # check if `element_dg` is zero
250-
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
251-
alpha_element = alpha[k]
252-
253-
@inbounds begin
254-
if element_dg != 0 # bad
255-
for ii in axes(du, 2)
256-
du[i, j, element_dg] += derivative_split[j, ii] *
257-
volume_flux_arr[i, j, ii, element_dg]
258-
end
259-
end
260-
261-
if element_dgfv != 0 # bad
262-
for ii in axes(du, 2)
263-
du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] *
264-
volume_flux_arr[i, j, ii, element_dgfv]
265-
end
266-
end
267-
end
268-
end
269-
270-
return nothing
271-
end
272-
273273
# Kernel for calculating DG volume integral contribution
274274
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
275275
volume_flux_arr, noncons_flux_arr,
@@ -805,6 +805,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
805805
nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
806806
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)
807807

808+
derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split`
809+
808810
volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
809811
element_ids_dgfv,
810812
alpha,

src/solvers/dg_2d.jl

+234
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,148 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha,
281281
return nothing
282282
end
283283

284+
# Kernel for calculating pure DG and DG-FV volume fluxes
285+
function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
286+
noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
287+
u, element_ids_dgfv, derivative_split,
288+
equations::AbstractEquations{2},
289+
volume_flux_dg::Any, nonconservative_flux_dg::Any,
290+
volume_flux_fv::Any, nonconservative_flux_fv::Any)
291+
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
292+
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
293+
294+
if (j <= size(u, 2)^3 && k <= size(u, 4))
295+
# length(element_ids_dgfv) == size(u, 4)
296+
u2 = size(u, 2)
297+
298+
j1 = div(j - 1, u2^2) + 1
299+
j2 = div(rem(j - 1, u2^2), u2) + 1
300+
j3 = rem(rem(j - 1, u2^2), u2) + 1
301+
302+
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
303+
304+
# The sets of `get_node_vars` operations may be combined
305+
# into a single set of operation for better performance (to be explored).
306+
307+
u_node = get_node_vars(u, equations, j1, j2, k)
308+
u_node1 = get_node_vars(u, equations, j3, j2, k)
309+
u_node2 = get_node_vars(u, equations, j1, j3, k)
310+
311+
volume_flux_node1 = volume_flux_dg(u_node, u_node1, 1, equations)
312+
volume_flux_node2 = volume_flux_dg(u_node, u_node2, 2, equations)
313+
314+
noncons_flux_node1 = nonconservative_flux_dg(u_node, u_node1, 1, equations)
315+
noncons_flux_node2 = nonconservative_flux_dg(u_node, u_node2, 2, equations)
316+
317+
@inbounds begin
318+
for ii in axes(u, 1)
319+
volume_flux_arr1[ii, j1, j3, j2, k] = derivative_split[j1, j3] *
320+
volume_flux_node1[ii]
321+
volume_flux_arr2[ii, j1, j2, j3, k] = derivative_split[j2, j3] *
322+
volume_flux_node2[ii]
323+
noncons_flux_arr1[ii, j1, j3, j2, k] = noncons_flux_node1[ii]
324+
noncons_flux_arr2[ii, j1, j2, j3, k] = noncons_flux_node2[ii]
325+
end
326+
end
327+
328+
if j1 != 1 && j3 == 1 && element_dgfv != 0 # bad
329+
u_ll = get_node_vars(u, equations, j1 - 1, j2, element_dgfv)
330+
u_rr = get_node_vars(u, equations, j1, j2, element_dgfv)
331+
332+
f1_node = volume_flux_fv(u_ll, u_rr, 1, equations)
333+
334+
f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations)
335+
f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations)
336+
337+
@inbounds begin
338+
for ii in axes(u, 1)
339+
fstar1_L[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii]
340+
fstar1_R[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii]
341+
end
342+
end
343+
end
344+
345+
if j2 != 1 && j3 == 1 && element_dgfv != 0 # bad
346+
u_ll = get_node_vars(u, equations, j1, j2 - 1, element_dgfv)
347+
u_rr = get_node_vars(u, equations, j1, j2, element_dgfv)
348+
349+
f2_node = volume_flux_fv(u_ll, u_rr, 2, equations)
350+
351+
f2_L_node = nonconservative_flux_fv(u_ll, u_rr, 2, equations)
352+
f2_R_node = nonconservative_flux_fv(u_rr, u_ll, 2, equations)
353+
354+
@inbounds begin
355+
for ii in axes(u, 1)
356+
fstar2_L[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_L_node[ii]
357+
fstar2_R[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_R_node[ii]
358+
end
359+
end
360+
end
361+
end
362+
363+
return nothing
364+
end
365+
366+
# Kernel for calculating DG volume integral contribution
367+
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
368+
volume_flux_arr1, volume_flux_arr2,
369+
noncons_flux_arr1, noncons_flux_arr2,
370+
equations::AbstractEquations{2})
371+
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
372+
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
373+
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
374+
375+
if (i <= size(du, 1) && j <= size(du, 2)^2 && k <= size(du, 4))
376+
# length(element_ids_dg) == size(du, 4)
377+
# length(element_ids_dgfv) == size(du, 4)
378+
379+
j1 = div(j - 1, size(du, 2)) + 1
380+
j2 = rem(j - 1, size(du, 2)) + 1
381+
382+
element_dg = element_ids_dg[k] # check if `element_dg` is zero
383+
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
384+
alpha_element = alpha[k]
385+
386+
@inbounds begin
387+
if element_dg != 0 # bad
388+
integral_contribution = 0.0
389+
390+
for ii in axes(du, 2)
391+
du[i, j1, j2, element_dg] += volume_flux_arr1[i, j1, ii, j2, element_dg]
392+
du[i, j1, j2, element_dg] += volume_flux_arr2[i, j1, j2, ii, element_dg]
393+
394+
integral_contribution += derivative_split[j1, ii] *
395+
noncons_flux_arr1[i, j1, ii, j2, element_dg]
396+
integral_contribution += derivative_split[j2, ii] *
397+
noncons_flux_arr2[i, j1, j2, ii, element_dg]
398+
end
399+
400+
du[i, j1, j2, element_dg] += 0.5 * integral_contribution
401+
end
402+
403+
if element_dgfv != 0 # bad
404+
integral_contribution = 0.0
405+
406+
for ii in axes(du, 2)
407+
du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
408+
volume_flux_arr1[i, j1, ii, j2, element_dgfv]
409+
du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
410+
volume_flux_arr2[i, j1, j2, ii, element_dgfv]
411+
412+
integral_contribution += derivative_split[j1, ii] *
413+
noncons_flux_arr1[i, j1, ii, j2, element_dgfv]
414+
integral_contribution += derivative_split[j2, ii] *
415+
noncons_flux_arr2[i, j1, j2, ii, element_dgfv]
416+
end
417+
418+
du[i, j1, j2, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution
419+
end
420+
end
421+
end
422+
423+
return nothing
424+
end
425+
284426
# Kernel for calculating FV volume integral contribution
285427
function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
286428
inverse_weights, element_ids_dgfv, alpha)
@@ -1050,6 +1192,98 @@ end
10501192
# Pack kernels for calculating volume integrals
10511193
function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations,
10521194
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
1195+
volume_flux_dg, nonconservative_flux_dg = dg.volume_integral.volume_flux_dg
1196+
volume_flux_fv, nonconservative_flux_fv = dg.volume_integral.volume_flux_fv
1197+
indicator = dg.volume_integral.indicator
1198+
1199+
# TODO: Get copies of `u` and `du` on both device and host
1200+
alpha = indicator(Array(u), mesh, equations, dg, cache)
1201+
alpha = CuArray{Float64}(alpha)
1202+
1203+
# For `Float64`, this gives 1.8189894035458565e-12
1204+
# For `Float32`, this gives 1.1920929f-5
1205+
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
1206+
1207+
element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
1208+
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
1209+
1210+
pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
1211+
element_ids_dgfv,
1212+
alpha,
1213+
atol)
1214+
pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol;
1215+
configurator_1d(pure_blended_element_count_kernel, alpha)...)
1216+
1217+
derivative_split = dg.basis.derivative_split
1218+
set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
1219+
1220+
derivative_split = CuArray{Float64}(derivative_split)
1221+
volume_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
1222+
size(u, 4))
1223+
volume_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
1224+
size(u, 4))
1225+
noncons_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
1226+
size(u, 4))
1227+
noncons_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
1228+
size(u, 4))
1229+
1230+
inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
1231+
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
1232+
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
1233+
fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
1234+
fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
1235+
1236+
size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4))
1237+
1238+
volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr1,
1239+
volume_flux_arr2,
1240+
noncons_flux_arr1,
1241+
noncons_flux_arr2,
1242+
fstar1_L, fstar1_R,
1243+
fstar2_L, fstar2_R,
1244+
u, element_ids_dgfv,
1245+
derivative_split,
1246+
equations,
1247+
volume_flux_dg,
1248+
nonconservative_flux_dg,
1249+
volume_flux_fv,
1250+
nonconservative_flux_fv)
1251+
volume_flux_dgfv_kernel(volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
1252+
noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u,
1253+
element_ids_dgfv, derivative_split, equations, volume_flux_dg,
1254+
nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
1255+
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)
1256+
1257+
derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split`
1258+
1259+
size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^2, size(du, 4))
1260+
1261+
volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
1262+
element_ids_dgfv,
1263+
alpha,
1264+
derivative_split,
1265+
volume_flux_arr1,
1266+
volume_flux_arr2,
1267+
noncons_flux_arr1,
1268+
noncons_flux_arr2,
1269+
equations)
1270+
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
1271+
volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
1272+
noncons_flux_arr2, equations;
1273+
configurator_3d(volume_integral_dg_kernel, size_arr)...)
1274+
1275+
size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 4))
1276+
1277+
volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L,
1278+
fstar1_R,
1279+
fstar2_L, fstar2_R,
1280+
inverse_weights,
1281+
element_ids_dgfv,
1282+
alpha)
1283+
volume_integral_fv_kernel(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R, inverse_weights,
1284+
element_ids_dgfv, alpha;
1285+
configurator_2d(volume_integral_fv_kernel, size_arr)...)
1286+
10531287
return nothing
10541288
end
10551289

0 commit comments

Comments
 (0)