forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflexible_top_k.cc
185 lines (161 loc) · 5.85 KB
/
flexible_top_k.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#include "caffe2/operators/flexible_top_k.h"
#include "caffe2/proto/caffe2_pb.h"
namespace caffe2 {
namespace {
template <typename T>
struct ValueCmp {
bool operator()(
const std::pair<T, int64_t>& lhs,
const std::pair<T, int64_t>& rhs) {
return (
lhs.first > rhs.first ||
(lhs.first == rhs.first && lhs.second < rhs.second));
}
};
} // namespace
template <typename T, class Context>
bool FlexibleTopKOp<T, Context>::RunOnDevice() {
auto& input = Input(0);
auto& k = Input(1);
const T* input_data = input.template data<T>();
const int64_t* k_data = k.template data<int64_t>();
// get flatten shape of input
CAFFE_ENFORCE_GT(input.dim(), 0);
vector<int64_t> input_dims = input.sizes().vec();
vector<int64_t> linear_shape = {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
size_to_dim_(input_dims.size() - 1, input_dims), input_dims.back()};
CAFFE_ENFORCE_EQ(
linear_shape[0],
k.numel(),
"first n-1 dims of input data and K does not match.");
int64_t output_size = 0;
for (int64_t i = 0; i < linear_shape[0]; ++i) {
CAFFE_ENFORCE(
linear_shape[1] >= k_data[i],
"k should not be greater than last dim, error at index ",
i,
", with value: ",
k_data[i]);
CAFFE_ENFORCE(
k_data[i] > 0,
"k should be greater than 0, error at index ",
i,
", with value: ",
k_data[i]);
output_size += k_data[i];
}
auto* values = Output(0, {output_size}, at::dtype<T>());
auto* indices = Output(1, {output_size}, at::dtype<int64_t>());
T* values_data = values->template mutable_data<T>();
int64_t* indices_data = indices->template mutable_data<int64_t>();
int64_t output_offset = 0;
// Sort preserving indices
for (int64_t i = 0; i < linear_shape[0]; ++i) {
// Build a min-heap, the heap element is pair of (value, idx)
// the top of the heap is the smallest value
std::priority_queue<
std::pair<T, int64_t>,
std::vector<std::pair<T, int64_t>>,
ValueCmp<T>>
PQ;
int64_t k_ = k_data[i];
for (int64_t j = 0; j < linear_shape[1]; ++j) {
const T value = input_data[i * linear_shape[1] + j];
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
if (PQ.size() < k_ || value > PQ.top().first) {
PQ.push(std::make_pair(value, j));
}
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
if (PQ.size() > k_) {
PQ.pop();
}
}
for (int64_t j = 0; j < k_; ++j) {
auto& pqElem = PQ.top();
values_data[output_offset + k_ - j - 1] = pqElem.first;
indices_data[output_offset + k_ - j - 1] = pqElem.second;
PQ.pop();
}
output_offset += k_;
}
return true;
}
template <typename T, class Context>
bool FlexibleTopKGradientOp<T, Context>::RunOnDevice() {
auto& original_input = Input(0);
auto& k = Input(1);
auto& values = Input(2);
auto& indices = Input(3);
const int64_t* k_data = k.template data<int64_t>();
const T* values_data = values.template data<T>();
const int64_t* indices_data = indices.template data<int64_t>();
// Resize output tensors to be as orignial_input size and initialized with 0
CAFFE_ENFORCE_GT(original_input.dim(), 0);
vector<int64_t> original_dims = original_input.sizes().vec();
auto* output = Output(0, original_dims, at::dtype<T>());
T* output_data = output->template mutable_data<T>();
math::Set<T, Context>(
output->numel(), static_cast<T>(0), output_data, &context_);
int64_t index_offset = 0;
for (int64_t i = 0; i < k.numel(); ++i) {
// offset of output_data
int64_t output_offset = i * original_dims.back();
for (int64_t j = 0; j < k_data[i]; ++j) {
int64_t index = indices_data[index_offset + j];
T value = values_data[index_offset + j];
output_data[output_offset + index] = value;
}
index_offset += k_data[i];
}
return true;
}
REGISTER_CPU_OPERATOR(FlexibleTopK, FlexibleTopKOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
FlexibleTopKGradient,
FlexibleTopKGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(FlexibleTopK)
.NumInputs(2)
.NumOutputs(2)
.SetDoc(R"DOC(
Given two tensors: X and K,
retrieve the top K[..., 1] elements from X on the last dimension.
X is an input tensor of shape [a_1, a_2, ..., a_n, r].
K is an input tensor of shape [a_1, a_2, ..., a_n, 1],
where for each element, r >= K[..., 1] > 0
Output two outputs:
-Flatten values tensor of shape [ \sum_i K[i, 1] ] which contains the values of
the top K[..., 1] elements along the last dimension
-Flatten indices tensor of shape [ \sum_i K[i, 1] ] which contains the indices
of the top K[..., 1] elements, flatten indices from the input tensor).
These two outputs should be used with the input K, so that we know which indices
in X are picked.
Given two equivalent values, this operator uses the indices along the last dim-
ension as a tiebreaker. That is, the element with the lower index will appear
first.
)DOC")
.Input(0, "X", "Tensor of shape [a_1, a_2, ..., a_n, r]")
.Input(1, "K", "Tensor of shape [a_1, a_2, ..., a_n, 1]")
.Output(
0,
"Flatten values",
"Tensor of shape [ \\sum_i K[i, 1] ] containing"
" top K[..., 1] values from the input tensor")
.Output(
1,
"Flatten indices",
"Tensor of shape [ \\sum_i K[i, 1] ] containing the indices "
"into the flatten input");
OPERATOR_SCHEMA(FlexibleTopKGradient).NumInputs(4).NumOutputs(1);
class GetFlexibleTopKGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"FlexibleTopKGradient",
"",
vector<string>{I(0), I(1), GO(0), O(1)},
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(FlexibleTopK, GetFlexibleTopKGradient);
} // namespace caffe2