Skip to content

Commit 2078657

Browse files
committed
proper fix
1 parent 3be97f7 commit 2078657

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

scripts/perftest.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,22 @@ using ParallelStencil.FiniteDifferences2D
66
@init_parallel_stencil(CUDA, Float64, 2)
77

88
@parallel function diffusion_step!(C2, C, D, dt, _dx, _dy)
9-
@inn(C2) = @inn(C) + dt * @inn(D) * (@d2_xi(C) * _dx * _dx + @d2_yi(C) * _dy * _dy)
9+
@inn(C2) = @inn(C) + dt * @inn(D) * (@d2_xi(C) * _dx * _dx + @d2_yi(C) * _dy * _dy)
1010
return
1111
end
1212

1313
function perftest()
1414
nx = ny = 512 * 64
15-
C = @rand(nx, ny)
16-
D = @rand(nx, ny)
15+
C = @rand(nx, ny)
16+
D = @rand(nx, ny)
1717
_dx = _dy = dt = rand()
1818
C2 = copy(C)
1919
t_it = @belapsed begin
2020
@parallel diffusion_step!($C2, $C, $D, $dt, $_dx, $_dy)
2121
end
2222
T_eff = (2 * 1 + 1) / 1e9 * nx * ny * sizeof(Float64) / t_it
2323
println("T_eff = $(T_eff) GiB/s using CUDA.jl on a Nvidia Tesla A100 GPU")
24-
println("So that's cool. We are getting close to hardware limit, running at $(round(T_eff/1355*100), sigdigits=2) % of memory copy! 🚀")
24+
println("So that's cool. We are getting close to hardware limit, running at $(round(T_eff/1355*100, sigdigits=2)) % of memory copy! 🚀")
2525
return
2626
end
2727

0 commit comments

Comments
 (0)