Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions gtests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_executable(artss_gtest ${ARTSS_SOURCE_FILES})
add_executable(artss_gtest_gpu ${ARTSS_SOURCE_FILES})

target_sources(artss_gtest
PRIVATE
Expand All @@ -11,9 +12,17 @@ add_subdirectory(GPULists)
add_subdirectory(randomField)
add_subdirectory(utility)

target_compile_options(artss_gtest_gpu PUBLIC -acc -Minfo=accel -ta=${GPU_CC_TA},lineinfo,${CUDA_VERSION_TA})
target_link_options(artss_gtest_gpu PUBLIC -acc -ta=${GPU_CC_TA},lineinfo,${CUDA_VERSION_TA})

target_include_directories(artss_gtest PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(artss_gtest gtest gtest_main)
target_link_libraries(artss_gtest spdlog::spdlog)
target_link_libraries(artss_gtest fmt::fmt)

target_include_directories(artss_gtest_gpu PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(artss_gtest_gpu gtest gtest_main)
target_link_libraries(artss_gtest_gpu spdlog::spdlog)
target_link_libraries(artss_gtest_gpu fmt::fmt)

add_test(NAME artss_gtest COMMAND artss_gtest)
5 changes: 5 additions & 0 deletions gtests/field/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@ target_sources(artss_gtest
PRIVATE
${CMAKE_CURRENT_LIST_DIR}/Field.cpp
)

target_sources(artss_gtest_gpu
PRIVATE
${CMAKE_CURRENT_LIST_DIR}/Field.cpp
)
33 changes: 32 additions & 1 deletion gtests/field/Field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,20 @@ TEST_F(FieldTest, copy_data) {
}
}

/**
* after using the copy function of Field, the pointer to the data array must
* still be the same (important for GPU use).
*/
TEST_F(FieldTest, copy_data_but_same_pointer) {
size_t size = 100;
Field a(UNKNOWN_FIELD, 0.0, 0, size);
Field b(UNKNOWN_FIELD, 0.5, 0, size);
real *data_pointer = a.data;
a.copy_data(b);
ASSERT_EQ(data_pointer, a.data);
}


TEST_F(FieldTest, stress_copy_data) {
size_t size = 100000;
Field a(UNKNOWN_FIELD, 0.0, 0, size);
Expand Down Expand Up @@ -132,7 +146,10 @@ TEST_F(FieldTest, add_two_fields) {
x += 1.0;
}

a.update_dev();
b.update_dev();
a += b;
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -152,9 +169,12 @@ TEST_F(FieldTest, stress_add_two_fields) {
x += 1.0;
}

a.update_dev();
b.update_dev();
for (int i = 0; i <= 100000; ++i) {
a += b;
}
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -175,7 +195,10 @@ TEST_F(FieldTest, mul_two_fields) {
x += 1.0;
}

a.update_dev();
b.update_dev();
a *= b;
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -195,9 +218,12 @@ TEST_F(FieldTest, stress_mul_two_fields) {
x += 1.0;
}

a.update_dev();
b.update_dev();
for (int i = 0; i < 100000; ++i) {
a *= b;
}
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -215,8 +241,9 @@ TEST_F(FieldTest, add_scalar) {
a[i] = x;
x += 1.0;
}

a.update_dev();
a += 0.5;
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -235,9 +262,11 @@ TEST_F(FieldTest, stress_add_scalar) {
x += 1.0;
}

a.update_dev();
for (int i = 0; i < 100000; ++i) {
a += 0.5;
}
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand All @@ -256,7 +285,9 @@ TEST_F(FieldTest, mul_scalar) {
x += 1.0;
}

a.update_dev();
a *= 0.5;
a.update_host();

x = 0.0;
for (auto i = 0; i < size; ++i) {
Expand Down
3 changes: 2 additions & 1 deletion src/field/Field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ Field::Field(const Field &original):
Mapping::get_field_type_name(m_type), m_level,
static_cast<void *>(this), static_cast<void *>(data));
#endif
#pragma acc enter data copyin(this[:1]) create(data[:m_size])
#pragma acc enter data copyin(this)
#pragma acc enter data create(data[:m_size])
this->copy_data(original);
}

Expand Down
18 changes: 7 additions & 11 deletions src/field/Field.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ class Field {

// acc functions
/// \brief update data array on CPU
void update_host() {
void update_host() const {
#pragma acc update host(data[:m_size])
}

/// \brief update data array on GPU
void update_dev() {
void update_dev() const {
#pragma acc update device(data[:m_size])
}

/// \brief copy data array to GPU
void copyin() {
void copyin() const {
#pragma acc enter data copyin(data[:m_size])
#ifndef BENCHMARKING
m_logger->debug("{} level {} copyin with data pointer: {}", Mapping::get_field_type_name(m_type), m_level, static_cast<void *>(data));
Expand Down Expand Up @@ -97,40 +97,36 @@ class Field {
static void swap(Field &a, Field &b) { std::swap(a.data, b.data); }

Field &operator+=(const real x) {
#pragma acc parallel loop independent present(this->data[:m_size]) async
#pragma acc parallel loop
for (size_t i = 0; i < m_size; ++i) {
this->data[i] += x;
}
#pragma acc wait
return *this;
}

Field &operator+=(const Field &rhs) {
auto rhs_data = rhs.data;
#pragma acc parallel loop independent present(this->data[:m_size], rhs_data[:m_size]) async
#pragma acc parallel loop present(rhs_data[:m_size])
for (size_t i = 0; i < m_size; ++i) {
this->data[i] += rhs_data[i];
}
#pragma acc wait
return *this;
}

Field &operator*=(const real x) {
#pragma acc parallel loop independent present(this->data[:m_size]) async
#pragma acc parallel loop
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we need a present clause here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same matter in lines 90 and 73

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

independent is also important to tell the compiler that the loop can be freely parallelise

for (size_t i = 0; i < m_size; ++i) {
this->data[i] *= x;
}
#pragma acc wait
return *this;
}

Field &operator*=(const Field &rhs) {
auto rhs_data = rhs.data;
#pragma acc parallel loop independent present(this->data[:m_size], rhs_data[:m_size]) async
#pragma acc parallel loop present(rhs_data[:m_size])
for (size_t i = 0; i < m_size; ++i) {
this->data[i] *= rhs_data[i];
}
#pragma acc wait
return *this;
}

Expand Down