forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmulti_class_accuracy_op.cu
75 lines (65 loc) · 2.25 KB
/
multi_class_accuracy_op.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/multi_class_accuracy_op.h"
#include "caffe2/utils/GpuAtomics.cuh"
#include "caffe2/utils/math.h"
namespace caffe2 {
namespace {
__global__ void MultiClassAccuracyKernel(const int N, const int D, const float* Xdata,
const int* labeldata, float* accuracies, int* amounts) {
CUDA_1D_KERNEL_LOOP(i, N) {
float maxval = Xdata[i * D];
int maxid = 0;
for (int j = 1; j < D; ++j) {
if (Xdata[i * D + j] > maxval) {
maxval = Xdata[i * D + j];
maxid = j;
}
}
int labelid = labeldata[i];
if (maxid == labelid) {
gpu_atomic_add(accuracies + labelid, static_cast<float>(1));
}
gpu_atomic_add(amounts + labelid, static_cast<int>(1));
}
}
__global__ void MultiClassAccuracyDivideKernel(
const int D, float* accuracies, const int* amounts) {
CUDA_1D_KERNEL_LOOP(i, D) {
if (amounts[i]) {
accuracies[i] /= amounts[i];
}
}
}
} // namespace
template <>
bool MultiClassAccuracyOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(PREDICTION);
auto& label = Input(LABEL);
TORCH_DCHECK_EQ(X.dim(), 2);
// amount, number of instances
int N = X.dim32(0);
// dimension, number of classes
int D = X.dim32(1);
TORCH_DCHECK_EQ(label.dim(), 1);
TORCH_DCHECK_EQ(label.dim32(0), N);
auto* Y0 = Output(0, {D}, at::dtype<float>());
auto* Y1 = Output(1, {D}, at::dtype<int>());
const float* Xdata = X.data<float>();
const int* labeldata = label.data<int>();
float* accuracies = Y0->template mutable_data<float>();
int* amounts = Y1->template mutable_data<int>();
math::Set<float, CUDAContext>(D, 0.0, accuracies, &context_);
math::Set<int, CUDAContext>(D, 0, amounts, &context_);
MultiClassAccuracyKernel<<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS,
0, context_.cuda_stream()>>>(
N, D, Xdata, labeldata, accuracies, amounts);
C10_CUDA_KERNEL_LAUNCH_CHECK();
MultiClassAccuracyDivideKernel<<<CAFFE_GET_BLOCKS(D), CAFFE_CUDA_NUM_THREADS,
0, context_.cuda_stream()>>>(
D, accuracies, amounts);
C10_CUDA_KERNEL_LAUNCH_CHECK();
return true;
}
REGISTER_CUDA_OPERATOR(
MultiClassAccuracy, MultiClassAccuracyOp<float, CUDAContext>);
} // namespace caffe2