Skip to content

Commit 4a8ccb3

Browse files
CUDA: no -sm row for very small matrices (ggml-org#10185)
1 parent 2a82891 commit 4a8ccb3

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

ggml/src/ggml-cuda.cu

+11
Original file line numberDiff line numberDiff line change
@@ -2978,6 +2978,17 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
29782978
{
29792979
struct ggml_tensor * a = op->src[0];
29802980
struct ggml_tensor * b = op->src[1];
2981+
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
2982+
// this avoids some edge cases (and the performance would not be good anyways)
2983+
if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
2984+
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
2985+
int64_t row_low;
2986+
int64_t row_high;
2987+
get_row_split(&row_low, &row_high, a, buft_ctx->tensor_split, dev_ctx->device);
2988+
if (row_low == row_high) {
2989+
return false;
2990+
}
2991+
}
29812992
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
29822993
return false;
29832994
}

0 commit comments

Comments
 (0)