From b00600efbd9ed5354f018ae352c68f02101a3547 Mon Sep 17 00:00:00 2001 From: Xiangyu Hu Date: Mon, 12 Aug 2024 20:41:59 +0000 Subject: [PATCH] bug in reduction: buffer update only after goes out of scope --- .../update_cell_linked_list_sycl.hpp | 2 +- .../shared/particle_dynamics/execution_sycl.h | 2 +- .../particle_iterators_sycl.h | 28 +++++++++---------- .../test_2d_dambreak_sycl/dambreak_sycl.cpp | 4 +-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/src_sycl/shared/particle_dynamics/configuration_dynamics/update_cell_linked_list_sycl.hpp b/src/src_sycl/shared/particle_dynamics/configuration_dynamics/update_cell_linked_list_sycl.hpp index cfbed25236..77ea3cfa4a 100644 --- a/src/src_sycl/shared/particle_dynamics/configuration_dynamics/update_cell_linked_list_sycl.hpp +++ b/src/src_sycl/shared/particle_dynamics/configuration_dynamics/update_cell_linked_list_sycl.hpp @@ -11,7 +11,7 @@ void UpdateCellLinkedList:: clearParticleOffsetList(const ParallelDevicePolicy &par_device) { UnsignedInt total_real_particles = particles_->TotalRealParticles(); - copyDataToDevice(total_real_particles, particle_offset_list_ + number_of_cells_, 1); + copyToDevice(total_real_particles, particle_offset_list_ + number_of_cells_, 1); execution_instance.getQueue() .submit( diff --git a/src/src_sycl/shared/particle_dynamics/execution_sycl.h b/src/src_sycl/shared/particle_dynamics/execution_sycl.h index 6a618d930c..fd32aa77f4 100644 --- a/src/src_sycl/shared/particle_dynamics/execution_sycl.h +++ b/src/src_sycl/shared/particle_dynamics/execution_sycl.h @@ -116,7 +116,7 @@ inline void copyToDevice(const T *host, T *device, std::size_t size) } template -inline void copyDataToDevice(const T &value, T *device, std::size_t size) +inline void copyToDevice(const T &value, T *device, std::size_t size) { execution::execution_instance.getQueue().fill(device, value, size); } diff --git a/src/src_sycl/shared/particle_dynamics/particle_iterators_sycl.h b/src/src_sycl/shared/particle_dynamics/particle_iterators_sycl.h index eea2a85162..c3e3d5ffef 100644 --- a/src/src_sycl/shared/particle_dynamics/particle_iterators_sycl.h +++ b/src/src_sycl/shared/particle_dynamics/particle_iterators_sycl.h @@ -50,24 +50,24 @@ inline void particle_for(const ParallelDevicePolicy &par_device, template inline ReturnType particle_reduce(const ParallelDevicePolicy &par_device, - const IndexRange &particles_range, const ReturnType &reference, Operation &&operation, + const IndexRange &particles_range, ReturnType temp, Operation &&operation, const LocalDynamicsFunction &local_dynamics_function) { - ReturnType result = reference; auto &sycl_queue = execution_instance.getQueue(); - sycl::buffer buffer_result(&result, 1); const size_t particles_size = particles_range.size(); - sycl_queue.submit([&](sycl::handler &cgh) - { auto reduction_operator = - sycl::reduction(buffer_result, cgh, operation); - cgh.parallel_for(execution_instance.getUniformNdRange(particles_size), reduction_operator, - [=](sycl::nd_item<1> item, auto& reduction) - { - if(item.get_global_id() < particles_size) - reduction.combine(local_dynamics_function(item.get_global_id(0))); - }); }) - .wait_and_throw(); - return result; + { + sycl::buffer buffer_result(&temp, 1); + sycl_queue.submit([&](sycl::handler &cgh) + { + auto reduction_operator = sycl::reduction(buffer_result, cgh, operation); + cgh.parallel_for(execution_instance.getUniformNdRange(particles_size), reduction_operator, + [=](sycl::nd_item<1> item, auto& reduction) { + if(item.get_global_id() < particles_size) + reduction.combine(local_dynamics_function(item.get_global_id(0))); + }); }) + .wait_and_throw(); + } // buffer_result goes out of scope, so the result (of temp) is updated + return temp; } } // namespace SPH #endif // PARTICLE_ITERATORS_SYCL_H diff --git a/tests/tests_sycl/2d_examples/test_2d_dambreak_sycl/dambreak_sycl.cpp b/tests/tests_sycl/2d_examples/test_2d_dambreak_sycl/dambreak_sycl.cpp index c49b95e93b..742e14e887 100644 --- a/tests/tests_sycl/2d_examples/test_2d_dambreak_sycl/dambreak_sycl.cpp +++ b/tests/tests_sycl/2d_examples/test_2d_dambreak_sycl/dambreak_sycl.cpp @@ -185,8 +185,8 @@ int main(int ac, char *av[]) if (number_of_iterations % restart_output_interval == 0) { dv_force_prior->synchronizeToDevice(); - dv_force->synchronizeWithDevice(); - dv_velocity->synchronizeWithDevice(); + dv_force->synchronizeToDevice(); + dv_velocity->synchronizeToDevice(); Real ck_advection_dt = ck_fluid_advection_time_step.exec(); std::cout << std::fixed << std::setprecision(9) << "N=" << number_of_iterations << " advection_dt = " << advection_dt << " ck_acoustic_dt = " << ck_advection_dt << "\n";