-
Notifications
You must be signed in to change notification settings - Fork 110
Argmax(cpp wrapper) #784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Argmax(cpp wrapper) #784
Conversation
ctests/test_triton_argmax.cpp
Outdated
#include "flag_gems/operators.h" | ||
#include "torch/torch.h" | ||
|
||
TEST(reduction_op_test, argmax) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to test with larger shapes and with different dtypes.
ctests/test_triton_argmax.cpp
Outdated
|
||
TEST(reduction_op_test, argmax_keepdim_option) { | ||
const torch::Device device(torch::kCUDA, 0); | ||
torch::Tensor input = torch::randn({2, 2, 2, 2}, device); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto, the test shape is too small
lib/argmax.cpp
Outdated
auto shape = self.sizes().vec(); | ||
for (auto &s : shape) { | ||
s = 1; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto shape = self.sizes().vec(); | |
for (auto &s : shape) { | |
s = 1; | |
} | |
const auto shape = std::vector<int64_t>(self.dim(), 1); |
lib/argmax.cpp
Outdated
c10::DeviceGuard guard(self.device()); | ||
c10::cuda::CUDAStream stream = c10::cuda::getCurrentCUDAStream(); | ||
|
||
f1(stream, mid_size, 1, 1, 4 /*num_warps*/, 2 /*num_stages*/, self, mid_value, mid_index, M, block_size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
f1(stream, mid_size, 1, 1, 4 /*num_warps*/, 2 /*num_stages*/, self, mid_value, mid_index, M, block_size); | |
f1(stream, mid_size, 1, 1, /* num_warps = */ 4, /* num_stages = */ 2 , self, mid_value, mid_index, M, block_size); |
lib/argmax.cpp
Outdated
|
||
f1(stream, mid_size, 1, 1, 4 /*num_warps*/, 2 /*num_stages*/, self, mid_value, mid_index, M, block_size); | ||
|
||
f2(stream, 1, 1, 1, 4 /*num_warps*/, 2 /*num_stages*/, mid_value, mid_index, out, mid_size, block_mid); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto
lib/argmax.cpp
Outdated
int64_t dim_val = dim.value(); | ||
dim_val = at::maybe_wrap_dim(dim_val, self.dim()); | ||
|
||
auto shape = self.sizes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto shape = self.sizes(); | |
const auto& shape = self.sizes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
PR Category
Operator
Type of Change
Refactor
Description
cpp wrapper
Issue
Progress
Performance