Skip to content

Commit aad40ec

Browse files
authored
Merge pull request brucefan1983#768 from brucefan1983/hip
From CUDA only to CUDA+HIP
2 parents 49bda9b + 96e2b59 commit aad40ec

File tree

113 files changed

+1002
-570
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+1002
-570
lines changed

src/force/dftd3.cu

+8-7
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ J. Comput. Chem., 32, 1456 (2011).
3232
#include "model/box.cuh"
3333
#include "neighbor.cuh"
3434
#include "utilities/common.cuh"
35+
#include "utilities/gpu_macro.cuh"
3536
#include <algorithm>
3637
#include <cctype>
3738
#include <iostream>
@@ -947,7 +948,7 @@ void DFTD3::compute_small_box(
947948
r12.data() + size_x12 * 3,
948949
r12.data() + size_x12 * 4,
949950
r12.data() + size_x12 * 5);
950-
CUDA_CHECK_KERNEL
951+
GPU_CHECK_KERNEL
951952

952953
find_dftd3_coordination_number_small_box<<<(N - 1) / 64 + 1, 64>>>(
953954
dftd3_para,
@@ -959,7 +960,7 @@ void DFTD3::compute_small_box(
959960
r12.data() + size_x12 * 4,
960961
r12.data() + size_x12 * 5,
961962
cn.data());
962-
CUDA_CHECK_KERNEL
963+
GPU_CHECK_KERNEL
963964

964965
add_dftd3_force_small_box<<<(N - 1) / 64 + 1, 64>>>(
965966
dftd3_para,
@@ -979,7 +980,7 @@ void DFTD3::compute_small_box(
979980
virial_per_atom.data(),
980981
dc6_sum.data(),
981982
dc8_sum.data());
982-
CUDA_CHECK_KERNEL
983+
GPU_CHECK_KERNEL
983984

984985
add_dftd3_force_extra_small_box<<<(N - 1) / 64 + 1, 64>>>(
985986
dftd3_para,
@@ -996,7 +997,7 @@ void DFTD3::compute_small_box(
996997
force_per_atom.data() + N,
997998
force_per_atom.data() + N * 2,
998999
virial_per_atom.data());
999-
CUDA_CHECK_KERNEL
1000+
GPU_CHECK_KERNEL
10001001
}
10011002

10021003
void DFTD3::compute_large_box(
@@ -1058,7 +1059,7 @@ void DFTD3::compute_large_box(
10581059
position_per_atom.data() + N,
10591060
position_per_atom.data() + N * 2,
10601061
cn.data());
1061-
CUDA_CHECK_KERNEL
1062+
GPU_CHECK_KERNEL
10621063

10631064
find_dftd3_force_large_box<<<(N - 1) / 64 + 1, 64>>>(
10641065
dftd3_para,
@@ -1084,7 +1085,7 @@ void DFTD3::compute_large_box(
10841085
virial_per_atom.data(),
10851086
dc6_sum.data(),
10861087
dc8_sum.data());
1087-
CUDA_CHECK_KERNEL
1088+
GPU_CHECK_KERNEL
10881089

10891090
find_dftd3_force_extra_large_box<<<(N - 1) / 64 + 1, 64>>>(
10901091
dftd3_para,
@@ -1107,7 +1108,7 @@ void DFTD3::compute_large_box(
11071108
force_per_atom.data() + N,
11081109
force_per_atom.data() + N * 2,
11091110
virial_per_atom.data());
1110-
CUDA_CHECK_KERNEL
1111+
GPU_CHECK_KERNEL
11111112
}
11121113

11131114
void DFTD3::compute(

src/force/dftd3para.cuh

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
#pragma once
1717

18+
#include "utilities/gpu_macro.cuh"
19+
1820
namespace
1921
{
2022
#define Bohr 0.5291772575069165f

src/force/eam.cu

+5-4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ The EAM potential. Currently two analytical versions:
2222
#include "eam.cuh"
2323
#include "neighbor.cuh"
2424
#include "utilities/error.cuh"
25+
#include "utilities/gpu_macro.cuh"
2526
#include <cstring>
2627
#define BLOCK_SIZE_FORCE 64
2728

@@ -514,7 +515,7 @@ void EAM::compute(
514515
position_per_atom.data() + number_of_atoms * 2,
515516
eam_data.Fp.data(),
516517
potential_per_atom.data());
517-
CUDA_CHECK_KERNEL
518+
GPU_CHECK_KERNEL
518519

519520
find_force_eam_step2<0><<<grid_size, BLOCK_SIZE_FORCE>>>(
520521
eam2004zhou,
@@ -535,7 +536,7 @@ void EAM::compute(
535536
force_per_atom.data() + 2 * number_of_atoms,
536537
virial_per_atom.data(),
537538
potential_per_atom.data());
538-
CUDA_CHECK_KERNEL
539+
GPU_CHECK_KERNEL
539540
}
540541

541542
if (potential_model == 1) {
@@ -554,7 +555,7 @@ void EAM::compute(
554555
position_per_atom.data() + number_of_atoms * 2,
555556
eam_data.Fp.data(),
556557
potential_per_atom.data());
557-
CUDA_CHECK_KERNEL
558+
GPU_CHECK_KERNEL
558559

559560
find_force_eam_step2<1><<<grid_size, BLOCK_SIZE_FORCE>>>(
560561
eam2004zhou,
@@ -575,6 +576,6 @@ void EAM::compute(
575576
force_per_atom.data() + 2 * number_of_atoms,
576577
virial_per_atom.data(),
577578
potential_per_atom.data());
578-
CUDA_CHECK_KERNEL
579+
GPU_CHECK_KERNEL
579580
}
580581
}

src/force/fcp.cu

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ The force constant potential (FCP)
1919

2020
#include "fcp.cuh"
2121
#include "utilities/error.cuh"
22+
#include "utilities/gpu_macro.cuh"
2223
#include <cstring>
2324
#include <vector>
2425

@@ -1038,7 +1039,7 @@ void FCP::compute(
10381039
position_per_atom.data() + number_of_atoms * 2,
10391040
fcp_data.r0.data(),
10401041
fcp_data.u.data());
1041-
CUDA_CHECK_KERNEL
1042+
GPU_CHECK_KERNEL
10421043

10431044
fcp_data.pfv.fill(0.0f);
10441045

@@ -1125,5 +1126,5 @@ void FCP::compute(
11251126
force_per_atom.data() + 2 * number_of_atoms,
11261127
virial_per_atom.data());
11271128

1128-
CUDA_CHECK_KERNEL
1129+
GPU_CHECK_KERNEL
11291130
}

src/force/force.cu

+19-18
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ The driver class calculating force and related quantities.
2828
#include "ilp_tmd_sw.cuh"
2929
#include "utilities/common.cuh"
3030
#include "utilities/error.cuh"
31+
#include "utilities/gpu_macro.cuh"
3132
#include "utilities/read_file.cuh"
3233
#include <cstring>
3334
#include <iostream>
@@ -106,7 +107,7 @@ void Force::parse_potential(
106107
strcmp(potential_name, "nep4_temperature") == 0 ||
107108
strcmp(potential_name, "nep4_zbl_temperature") == 0) {
108109
int num_gpus;
109-
CHECK(cudaGetDeviceCount(&num_gpus));
110+
CHECK(gpuGetDeviceCount(&num_gpus));
110111
#ifdef ZHEYONG
111112
num_gpus = 3;
112113
#endif
@@ -226,7 +227,7 @@ static __global__ void gpu_sum_force(int N, double* g_fx, double* g_fy, double*
226227
s_f[tid] = f;
227228
__syncthreads();
228229

229-
#pragma unroll
230+
230231
for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
231232
if (tid < offset) {
232233
s_f[tid] += s_f[tid + offset];
@@ -466,7 +467,7 @@ void Force::compute(
466467
force_per_atom.data() + number_of_atoms * 2,
467468
potential_per_atom.data(),
468469
virial_per_atom.data());
469-
CUDA_CHECK_KERNEL
470+
GPU_CHECK_KERNEL
470471

471472
if (multiple_potentials_mode_.compare("observe") == 0) {
472473
// If observing, calculate using main potential only
@@ -516,7 +517,7 @@ void Force::compute(
516517
force_per_atom.data(),
517518
virial_per_atom.data(),
518519
(double)potentials.size());
519-
CUDA_CHECK_KERNEL
520+
GPU_CHECK_KERNEL
520521
} else {
521522
PRINT_INPUT_ERROR("Invalid mode for multiple potentials.\n");
522523
}
@@ -552,7 +553,7 @@ void Force::compute(
552553
force_per_atom.data() + number_of_atoms,
553554
force_per_atom.data() + 2 * number_of_atoms,
554555
ftot.data());
555-
CUDA_CHECK_KERNEL
556+
GPU_CHECK_KERNEL
556557

557558
gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
558559
number_of_atoms,
@@ -561,7 +562,7 @@ void Force::compute(
561562
force_per_atom.data() + number_of_atoms,
562563
force_per_atom.data() + 2 * number_of_atoms,
563564
ftot.data());
564-
CUDA_CHECK_KERNEL
565+
GPU_CHECK_KERNEL
565566
}
566567

567568
// always correct the force when using the FCP potential
@@ -574,7 +575,7 @@ void Force::compute(
574575
force_per_atom.data() + number_of_atoms,
575576
force_per_atom.data() + 2 * number_of_atoms,
576577
ftot.data());
577-
CUDA_CHECK_KERNEL
578+
GPU_CHECK_KERNEL
578579

579580
gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
580581
number_of_atoms,
@@ -583,7 +584,7 @@ void Force::compute(
583584
force_per_atom.data() + number_of_atoms,
584585
force_per_atom.data() + 2 * number_of_atoms,
585586
ftot.data());
586-
CUDA_CHECK_KERNEL
587+
GPU_CHECK_KERNEL
587588
}
588589
}
589590
}
@@ -647,7 +648,7 @@ static __global__ void gpu_sum_tensor(int N, double* g_tensor, double* g_sum_ten
647648
s_t[tid] = t;
648649
__syncthreads();
649650

650-
#pragma unroll
651+
651652
for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
652653
if (tid < offset) {
653654
s_t[tid] += s_t[tid + offset];
@@ -754,7 +755,7 @@ void Force::compute(
754755
force_per_atom.data() + number_of_atoms * 2,
755756
potential_per_atom.data(),
756757
virial_per_atom.data());
757-
CUDA_CHECK_KERNEL
758+
GPU_CHECK_KERNEL
758759

759760
temperature += delta_T;
760761
if (multiple_potentials_mode_.compare("observe") == 0) {
@@ -805,7 +806,7 @@ void Force::compute(
805806
force_per_atom.data(),
806807
virial_per_atom.data(),
807808
(double)potentials.size());
808-
CUDA_CHECK_KERNEL
809+
GPU_CHECK_KERNEL
809810
} else {
810811
PRINT_INPUT_ERROR("Invalid mode for multiple potentials.\n");
811812
}
@@ -841,7 +842,7 @@ void Force::compute(
841842
force_per_atom.data() + number_of_atoms,
842843
force_per_atom.data() + 2 * number_of_atoms,
843844
ftot.data());
844-
CUDA_CHECK_KERNEL
845+
GPU_CHECK_KERNEL
845846

846847
gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
847848
number_of_atoms,
@@ -850,7 +851,7 @@ void Force::compute(
850851
force_per_atom.data() + number_of_atoms,
851852
force_per_atom.data() + 2 * number_of_atoms,
852853
ftot.data());
853-
CUDA_CHECK_KERNEL
854+
GPU_CHECK_KERNEL
854855
} else if (compute_hnemdec_ == 0) {
855856
// the tensor:
856857
// xx xy xz 0 3 4
@@ -876,10 +877,10 @@ void Force::compute(
876877
virial_per_atom.data() + 8 * number_of_atoms,
877878
virial_per_atom.data() + 2 * number_of_atoms,
878879
tensor_per_atom.data());
879-
CUDA_CHECK_KERNEL
880+
GPU_CHECK_KERNEL
880881

881882
gpu_sum_tensor<<<9, 1024>>>(number_of_atoms, tensor_per_atom.data(), tensor_tot.data());
882-
CUDA_CHECK_KERNEL
883+
GPU_CHECK_KERNEL
883884

884885
gpu_add_driving_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
885886
number_of_atoms,
@@ -901,7 +902,7 @@ void Force::compute(
901902
force_per_atom.data(),
902903
force_per_atom.data() + number_of_atoms,
903904
force_per_atom.data() + 2 * number_of_atoms);
904-
CUDA_CHECK_KERNEL
905+
GPU_CHECK_KERNEL
905906

906907
} else if (compute_hnemdec_ != -1) {
907908
gpu_add_driving_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
@@ -926,7 +927,7 @@ void Force::compute(
926927
force_per_atom.data() + number_of_atoms,
927928
force_per_atom.data() + 2 * number_of_atoms,
928929
ftot.data());
929-
CUDA_CHECK_KERNEL
930+
GPU_CHECK_KERNEL
930931

931932
gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
932933
number_of_atoms,
@@ -935,7 +936,7 @@ void Force::compute(
935936
force_per_atom.data() + number_of_atoms,
936937
force_per_atom.data() + 2 * number_of_atoms,
937938
ftot.data());
938-
CUDA_CHECK_KERNEL
939+
GPU_CHECK_KERNEL
939940
}
940941
}
941942
}

src/force/force_constant.cu

+7-6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Use finite difference to calculate the seconod order force constants:
2323
#include "model/box.cuh"
2424
#include "model/group.cuh"
2525
#include "utilities/error.cuh"
26+
#include "utilities/gpu_macro.cuh"
2627
#include <vector>
2728

2829
static __global__ void gpu_shift_atom(const double dx, double* x) { x[0] += dx; }
@@ -34,13 +35,13 @@ static void shift_atom(
3435

3536
if (beta == 0) {
3637
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + n2);
37-
CUDA_CHECK_KERNEL
38+
GPU_CHECK_KERNEL
3839
} else if (beta == 1) {
3940
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + number_of_atoms + n2);
40-
CUDA_CHECK_KERNEL
41+
GPU_CHECK_KERNEL
4142
} else {
4243
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + number_of_atoms * 2 + n2);
43-
CUDA_CHECK_KERNEL
44+
GPU_CHECK_KERNEL
4445
}
4546
}
4647

@@ -67,10 +68,10 @@ static void get_f(
6768
box, position_per_atom, type, group, potential_per_atom, force_per_atom, virial_per_atom);
6869

6970
size_t M = sizeof(double);
70-
CHECK(cudaMemcpy(f + 0, force_per_atom.data() + n1, M, cudaMemcpyDeviceToHost));
71-
CHECK(cudaMemcpy(f + 1, force_per_atom.data() + n1 + number_of_atoms, M, cudaMemcpyDeviceToHost));
71+
CHECK(gpuMemcpy(f + 0, force_per_atom.data() + n1, M, gpuMemcpyDeviceToHost));
72+
CHECK(gpuMemcpy(f + 1, force_per_atom.data() + n1 + number_of_atoms, M, gpuMemcpyDeviceToHost));
7273
CHECK(
73-
cudaMemcpy(f + 2, force_per_atom.data() + n1 + number_of_atoms * 2, M, cudaMemcpyDeviceToHost));
74+
gpuMemcpy(f + 2, force_per_atom.data() + n1 + number_of_atoms * 2, M, gpuMemcpyDeviceToHost));
7475

7576
shift_atom(-dx, n2, beta, position_per_atom);
7677
}

0 commit comments

Comments
 (0)