@@ -281,6 +281,148 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha,
281
281
return nothing
282
282
end
283
283
284
+ # Kernel for calculating pure DG and DG-FV volume fluxes
285
+ function volume_flux_dgfv_kernel! (volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
286
+ noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
287
+ u, element_ids_dgfv, derivative_split,
288
+ equations:: AbstractEquations{2} ,
289
+ volume_flux_dg:: Any , nonconservative_flux_dg:: Any ,
290
+ volume_flux_fv:: Any , nonconservative_flux_fv:: Any )
291
+ j = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
292
+ k = (blockIdx (). y - 1 ) * blockDim (). y + threadIdx (). y
293
+
294
+ if (j <= size (u, 2 )^ 3 && k <= size (u, 4 ))
295
+ # length(element_ids_dgfv) == size(u, 4)
296
+ u2 = size (u, 2 )
297
+
298
+ j1 = div (j - 1 , u2^ 2 ) + 1
299
+ j2 = div (rem (j - 1 , u2^ 2 ), u2) + 1
300
+ j3 = rem (rem (j - 1 , u2^ 2 ), u2) + 1
301
+
302
+ element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
303
+
304
+ # The sets of `get_node_vars` operations may be combined
305
+ # into a single set of operation for better performance (to be explored).
306
+
307
+ u_node = get_node_vars (u, equations, j1, j2, k)
308
+ u_node1 = get_node_vars (u, equations, j3, j2, k)
309
+ u_node2 = get_node_vars (u, equations, j1, j3, k)
310
+
311
+ volume_flux_node1 = volume_flux_dg (u_node, u_node1, 1 , equations)
312
+ volume_flux_node2 = volume_flux_dg (u_node, u_node2, 2 , equations)
313
+
314
+ noncons_flux_node1 = nonconservative_flux_dg (u_node, u_node1, 1 , equations)
315
+ noncons_flux_node2 = nonconservative_flux_dg (u_node, u_node2, 2 , equations)
316
+
317
+ @inbounds begin
318
+ for ii in axes (u, 1 )
319
+ volume_flux_arr1[ii, j1, j3, j2, k] = derivative_split[j1, j3] *
320
+ volume_flux_node1[ii]
321
+ volume_flux_arr2[ii, j1, j2, j3, k] = derivative_split[j2, j3] *
322
+ volume_flux_node2[ii]
323
+ noncons_flux_arr1[ii, j1, j3, j2, k] = noncons_flux_node1[ii]
324
+ noncons_flux_arr2[ii, j1, j2, j3, k] = noncons_flux_node2[ii]
325
+ end
326
+ end
327
+
328
+ if j1 != 1 && j3 == 1 && element_dgfv != 0 # bad
329
+ u_ll = get_node_vars (u, equations, j1 - 1 , j2, element_dgfv)
330
+ u_rr = get_node_vars (u, equations, j1, j2, element_dgfv)
331
+
332
+ f1_node = volume_flux_fv (u_ll, u_rr, 1 , equations)
333
+
334
+ f1_L_node = nonconservative_flux_fv (u_ll, u_rr, 1 , equations)
335
+ f1_R_node = nonconservative_flux_fv (u_rr, u_ll, 1 , equations)
336
+
337
+ @inbounds begin
338
+ for ii in axes (u, 1 )
339
+ fstar1_L[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii]
340
+ fstar1_R[ii, j1, j2, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii]
341
+ end
342
+ end
343
+ end
344
+
345
+ if j2 != 1 && j3 == 1 && element_dgfv != 0 # bad
346
+ u_ll = get_node_vars (u, equations, j1, j2 - 1 , element_dgfv)
347
+ u_rr = get_node_vars (u, equations, j1, j2, element_dgfv)
348
+
349
+ f2_node = volume_flux_fv (u_ll, u_rr, 2 , equations)
350
+
351
+ f2_L_node = nonconservative_flux_fv (u_ll, u_rr, 2 , equations)
352
+ f2_R_node = nonconservative_flux_fv (u_rr, u_ll, 2 , equations)
353
+
354
+ @inbounds begin
355
+ for ii in axes (u, 1 )
356
+ fstar2_L[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_L_node[ii]
357
+ fstar2_R[ii, j1, j2, element_dgfv] = f2_node[ii] + 0.5 * f2_R_node[ii]
358
+ end
359
+ end
360
+ end
361
+ end
362
+
363
+ return nothing
364
+ end
365
+
366
+ # Kernel for calculating DG volume integral contribution
367
+ function volume_integral_dg_kernel! (du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
368
+ volume_flux_arr1, volume_flux_arr2,
369
+ noncons_flux_arr1, noncons_flux_arr2,
370
+ equations:: AbstractEquations{2} )
371
+ i = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
372
+ j = (blockIdx (). y - 1 ) * blockDim (). y + threadIdx (). y
373
+ k = (blockIdx (). z - 1 ) * blockDim (). z + threadIdx (). z
374
+
375
+ if (i <= size (du, 1 ) && j <= size (du, 2 )^ 2 && k <= size (du, 4 ))
376
+ # length(element_ids_dg) == size(du, 4)
377
+ # length(element_ids_dgfv) == size(du, 4)
378
+
379
+ j1 = div (j - 1 , size (du, 2 )) + 1
380
+ j2 = rem (j - 1 , size (du, 2 )) + 1
381
+
382
+ element_dg = element_ids_dg[k] # check if `element_dg` is zero
383
+ element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
384
+ alpha_element = alpha[k]
385
+
386
+ @inbounds begin
387
+ if element_dg != 0 # bad
388
+ integral_contribution = 0.0
389
+
390
+ for ii in axes (du, 2 )
391
+ du[i, j1, j2, element_dg] += volume_flux_arr1[i, j1, ii, j2, element_dg]
392
+ du[i, j1, j2, element_dg] += volume_flux_arr2[i, j1, j2, ii, element_dg]
393
+
394
+ integral_contribution += derivative_split[j1, ii] *
395
+ noncons_flux_arr1[i, j1, ii, j2, element_dg]
396
+ integral_contribution += derivative_split[j2, ii] *
397
+ noncons_flux_arr2[i, j1, j2, ii, element_dg]
398
+ end
399
+
400
+ du[i, j1, j2, element_dg] += 0.5 * integral_contribution
401
+ end
402
+
403
+ if element_dgfv != 0 # bad
404
+ integral_contribution = 0.0
405
+
406
+ for ii in axes (du, 2 )
407
+ du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
408
+ volume_flux_arr1[i, j1, ii, j2, element_dgfv]
409
+ du[i, j1, j2, element_dgfv] += (1 - alpha_element) *
410
+ volume_flux_arr2[i, j1, j2, ii, element_dgfv]
411
+
412
+ integral_contribution += derivative_split[j1, ii] *
413
+ noncons_flux_arr1[i, j1, ii, j2, element_dgfv]
414
+ integral_contribution += derivative_split[j2, ii] *
415
+ noncons_flux_arr2[i, j1, j2, ii, element_dgfv]
416
+ end
417
+
418
+ du[i, j1, j2, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution
419
+ end
420
+ end
421
+ end
422
+
423
+ return nothing
424
+ end
425
+
284
426
# Kernel for calculating FV volume integral contribution
285
427
function volume_integral_fv_kernel! (du, fstar1_L, fstar1_R, fstar2_L, fstar2_R,
286
428
inverse_weights, element_ids_dgfv, alpha)
@@ -1050,6 +1192,98 @@ end
1050
1192
# Pack kernels for calculating volume integrals
1051
1193
function cuda_volume_integral! (du, u, mesh:: TreeMesh{2} , nonconservative_terms:: True , equations,
1052
1194
volume_integral:: VolumeIntegralShockCapturingHG , dg:: DGSEM , cache)
1195
+ volume_flux_dg, nonconservative_flux_dg = dg. volume_integral. volume_flux_dg
1196
+ volume_flux_fv, nonconservative_flux_fv = dg. volume_integral. volume_flux_fv
1197
+ indicator = dg. volume_integral. indicator
1198
+
1199
+ # TODO : Get copies of `u` and `du` on both device and host
1200
+ alpha = indicator (Array (u), mesh, equations, dg, cache)
1201
+ alpha = CuArray {Float64} (alpha)
1202
+
1203
+ # For `Float64`, this gives 1.8189894035458565e-12
1204
+ # For `Float32`, this gives 1.1920929f-5
1205
+ atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
1206
+
1207
+ element_ids_dg = zero (CuArray {Int64} (undef, length (alpha)))
1208
+ element_ids_dgfv = zero (CuArray {Int64} (undef, length (alpha)))
1209
+
1210
+ pure_blended_element_count_kernel = @cuda launch= false pure_blended_element_count_kernel! (element_ids_dg,
1211
+ element_ids_dgfv,
1212
+ alpha,
1213
+ atol)
1214
+ pure_blended_element_count_kernel (element_ids_dg, element_ids_dgfv, alpha, atol;
1215
+ configurator_1d (pure_blended_element_count_kernel, alpha)... )
1216
+
1217
+ derivative_split = dg. basis. derivative_split
1218
+ set_diagonal_to_zero! (derivative_split) # temporarily set here, maybe move outside `rhs!`
1219
+
1220
+ derivative_split = CuArray {Float64} (derivative_split)
1221
+ volume_flux_arr1 = CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ), size (u, 2 ),
1222
+ size (u, 4 ))
1223
+ volume_flux_arr2 = CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ), size (u, 2 ),
1224
+ size (u, 4 ))
1225
+ noncons_flux_arr1 = CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ), size (u, 2 ),
1226
+ size (u, 4 ))
1227
+ noncons_flux_arr2 = CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ), size (u, 2 ),
1228
+ size (u, 4 ))
1229
+
1230
+ inverse_weights = CuArray {Float64} (dg. basis. inverse_weights)
1231
+ fstar1_L = zero (CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ) + 1 , size (u, 2 ), size (u, 4 )))
1232
+ fstar1_R = zero (CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ) + 1 , size (u, 2 ), size (u, 4 )))
1233
+ fstar2_L = zero (CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ) + 1 , size (u, 4 )))
1234
+ fstar2_R = zero (CuArray {Float64} (undef, size (u, 1 ), size (u, 2 ), size (u, 2 ) + 1 , size (u, 4 )))
1235
+
1236
+ size_arr = CuArray {Float64} (undef, size (u, 2 )^ 3 , size (u, 4 ))
1237
+
1238
+ volume_flux_dgfv_kernel = @cuda launch= false volume_flux_dgfv_kernel! (volume_flux_arr1,
1239
+ volume_flux_arr2,
1240
+ noncons_flux_arr1,
1241
+ noncons_flux_arr2,
1242
+ fstar1_L, fstar1_R,
1243
+ fstar2_L, fstar2_R,
1244
+ u, element_ids_dgfv,
1245
+ derivative_split,
1246
+ equations,
1247
+ volume_flux_dg,
1248
+ nonconservative_flux_dg,
1249
+ volume_flux_fv,
1250
+ nonconservative_flux_fv)
1251
+ volume_flux_dgfv_kernel (volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
1252
+ noncons_flux_arr2, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u,
1253
+ element_ids_dgfv, derivative_split, equations, volume_flux_dg,
1254
+ nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
1255
+ configurator_2d (volume_flux_dgfv_kernel, size_arr)... )
1256
+
1257
+ derivative_split = CuArray {Float64} (dg. basis. derivative_split) # use original `derivative_split`
1258
+
1259
+ size_arr = CuArray {Float64} (undef, size (du, 1 ), size (du, 2 )^ 2 , size (du, 4 ))
1260
+
1261
+ volume_integral_dg_kernel = @cuda launch= false volume_integral_dg_kernel! (du, element_ids_dg,
1262
+ element_ids_dgfv,
1263
+ alpha,
1264
+ derivative_split,
1265
+ volume_flux_arr1,
1266
+ volume_flux_arr2,
1267
+ noncons_flux_arr1,
1268
+ noncons_flux_arr2,
1269
+ equations)
1270
+ volume_integral_dg_kernel (du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
1271
+ volume_flux_arr1, volume_flux_arr2, noncons_flux_arr1,
1272
+ noncons_flux_arr2, equations;
1273
+ configurator_3d (volume_integral_dg_kernel, size_arr)... )
1274
+
1275
+ size_arr = CuArray {Float64} (undef, size (u, 2 )^ 2 , size (u, 4 ))
1276
+
1277
+ volume_integral_fv_kernel = @cuda launch= false volume_integral_fv_kernel! (du, fstar1_L,
1278
+ fstar1_R,
1279
+ fstar2_L, fstar2_R,
1280
+ inverse_weights,
1281
+ element_ids_dgfv,
1282
+ alpha)
1283
+ volume_integral_fv_kernel (du, fstar1_L, fstar1_R, fstar2_L, fstar2_R, inverse_weights,
1284
+ element_ids_dgfv, alpha;
1285
+ configurator_2d (volume_integral_fv_kernel, size_arr)... )
1286
+
1053
1287
return nothing
1054
1288
end
1055
1289
0 commit comments