Skip to content

Commit 5a8ae30

Browse files
authored
vulkan: automatically deduce size of push constants (#13936)
1 parent 0d39844 commit 5a8ae30

File tree

1 file changed

+54
-28
lines changed

1 file changed

+54
-28
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4079,7 +4079,33 @@ static vk_submission ggml_vk_begin_submission(vk_device& device, vk_queue& q, bo
40794079
return s;
40804080
}
40814081

4082-
static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, size_t push_constant_size, const void* push_constants, std::array<uint32_t, 3> elements) {
4082+
template <typename T> size_t push_constant_size(const T &t) {
4083+
static_assert(std::is_class<T>::value, "T must be a struct/class");
4084+
GGML_UNUSED(t);
4085+
return sizeof(T);
4086+
}
4087+
template <typename T> size_t push_constant_size(const std::vector<T> &t) {
4088+
GGML_UNUSED(t);
4089+
return sizeof(T) * t.size();
4090+
}
4091+
template <typename T, uint32_t N> size_t push_constant_size(const std::array<T, N> &t) {
4092+
GGML_UNUSED(t);
4093+
return sizeof(T) * N;
4094+
}
4095+
4096+
template <typename T> const T *push_constant_data(const T &t) {
4097+
static_assert(std::is_class<T>::value, "T must be a struct/class");
4098+
return &t;
4099+
}
4100+
template <typename T> const T *push_constant_data(const std::vector<T> &t) {
4101+
return t.data();
4102+
}
4103+
template <typename T, uint32_t N> const T *push_constant_data(const std::array<T, N> &t) {
4104+
return t.data();
4105+
}
4106+
4107+
template <typename T>
4108+
static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, const T &push_constants, std::array<uint32_t, 3> elements) {
40834109
const uint32_t wg0 = CEIL_DIV(elements[0], pipeline->wg_denoms[0]);
40844110
const uint32_t wg1 = CEIL_DIV(elements[1], pipeline->wg_denoms[1]);
40854111
const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]);
@@ -4095,7 +4121,7 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context&
40954121
vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0, 0, pipeline->parameter_count, vk::DescriptorType::eStorageBuffer, nullptr, descriptor_buffer_infos.begin() };
40964122
ctx->device->device.updateDescriptorSets({ write_descriptor_set }, {});
40974123

4098-
subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size, push_constants);
4124+
subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size(push_constants), push_constant_data(push_constants));
40994125
subctx->s->buffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->pipeline);
41004126
subctx->s->buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
41014127
pipeline->layout,
@@ -4558,18 +4584,18 @@ static void ggml_vk_matmul(
45584584
ggml_vk_sync_buffers(subctx);
45594585
if (split_k == 1) {
45604586
const vk_mat_mat_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, k, ne02, ne12, broadcast2, broadcast3, padded_n };
4561-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, sizeof(vk_mat_mat_push_constants), &pc, { m, n, batch });
4587+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, pc, { m, n, batch });
45624588
return;
45634589
}
45644590

45654591
GGML_ASSERT(batch_stride_d == m * n);
45664592

45674593
const vk_mat_mat_push_constants pc1 = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, CEIL_DIV(k, split_k), ne02, ne12, broadcast2, broadcast3, padded_n };
45684594
// Make sure enough workgroups get assigned for split k to work
4569-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, sizeof(vk_mat_mat_push_constants), &pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
4595+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
45704596
ggml_vk_sync_buffers(subctx);
45714597
const std::array<uint32_t, 2> pc2 = { (uint32_t)(m * n * batch), split_k };
4572-
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data(), { m * n * batch, 1, 1 });
4598+
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2, { m * n * batch, 1, 1 });
45734599
}
45744600

45754601
static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n, bool aligned, ggml_type src0_type) {
@@ -4617,7 +4643,7 @@ static void ggml_vk_matmul_id(
46174643
ggml_vk_sync_buffers(subctx);
46184644
const vk_mat_mat_id_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d,
46194645
nei0, nei1, nbi1, ne11, padded_n };
4620-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, sizeof(vk_mat_mat_id_push_constants), &pc, { m, nei1, n_as });
4646+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, pc, { m, nei1, n_as });
46214647
}
46224648

46234649
static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
@@ -4738,7 +4764,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
47384764
};
47394765
init_pushconst_fastdiv(pc);
47404766
ggml_vk_sync_buffers(subctx);
4741-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(vk_op_unary_push_constants), &pc, elements);
4767+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, pc, elements);
47424768
}
47434769

47444770
static vk_pipeline ggml_vk_get_quantize_pipeline(ggml_backend_vk_context * ctx, ggml_type type) {
@@ -4757,7 +4783,7 @@ static void ggml_vk_quantize_q8_1(ggml_backend_vk_context * ctx, vk_context& sub
47574783
vk_pipeline pipeline = ggml_vk_get_quantize_pipeline(ctx, GGML_TYPE_Q8_1);
47584784

47594785
ggml_vk_sync_buffers(subctx);
4760-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(uint32_t), &ne, { ne, 1, 1 });
4786+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, std::array<uint32_t, 1>{ne}, { ne, 1, 1 });
47614787
}
47624788

47634789
static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -4957,7 +4983,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
49574983
} else if (qx_needs_dequant) {
49584984
const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
49594985
ggml_vk_sync_buffers(subctx);
4960-
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
4986+
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
49614987
}
49624988
if (y_non_contig) {
49634989
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5173,7 +5199,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
51735199
ggml_vk_sync_buffers(subctx);
51745200
ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
51755201
{ vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 }, vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23} },
5176-
sizeof(vk_mat_vec_push_constants), &pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
5202+
pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
51775203
}
51785204

51795205
static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5261,7 +5287,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
52615287
}
52625288

52635289
ggml_vk_sync_buffers(subctx);
5264-
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, workgroups_z });
5290+
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, workgroups_z });
52655291
}
52665292

52675293
static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5344,7 +5370,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
53445370
const std::array<uint32_t, 9> pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, channel_stride_y, (uint32_t)(ne12 / ne02), (uint32_t)ne12, (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) };
53455371
ggml_vk_sync_buffers(subctx);
53465372
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_nc_f16_f32,
5347-
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
5373+
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
53485374
}
53495375

53505376
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5560,7 +5586,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
55605586
const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
55615587
ggml_vk_sync_buffers(subctx);
55625588
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0,
5563-
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
5589+
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
55645590
}
55655591
if (y_non_contig) {
55665592
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5780,7 +5806,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
57805806
ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
57815807
{ vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 },
57825808
vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23}, vk_subbuffer{ d_ids, ids_buf_offset, ids_sz } },
5783-
sizeof(vk_mat_vec_id_push_constants), &pc, { groups_x, (uint32_t)nei0, groups_z });
5809+
pc, { groups_x, (uint32_t)nei0, groups_z });
57845810
}
57855811

57865812
static void ggml_vk_mul_mat_id(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * src2, ggml_tensor * dst, bool dryrun = false) {
@@ -6130,7 +6156,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61306156
// there's no more than one tile of rows (i.e. workgroups_x would have been
61316157
// one). We reuse workgroups_x to mean the number of splits, so we need to
61326158
// cancel out the divide by wg_denoms[0].
6133-
sizeof(vk_flash_attn_push_constants), &pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
6159+
pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
61346160

61356161
ggml_vk_sync_buffers(subctx);
61366162
const std::array<uint32_t, 3> pc2 = { D, (uint32_t)ne1, split_k };
@@ -6139,7 +6165,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61396165
vk_subbuffer{ctx->prealloc_split_k, 0, VK_WHOLE_SIZE},
61406166
vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61416167
},
6142-
pc2.size() * uint32_t{sizeof(uint32_t)}, pc2.data(), { (uint32_t)ne1, 1, 1 });
6168+
pc2, { (uint32_t)ne1, 1, 1 });
61436169
} else {
61446170
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline,
61456171
{
@@ -6149,7 +6175,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61496175
vk_subbuffer{d_M, m_buf_offset, VK_WHOLE_SIZE},
61506176
vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61516177
},
6152-
sizeof(vk_flash_attn_push_constants), &pc, { workgroups_x, workgroups_y, workgroups_z });
6178+
pc, { workgroups_x, workgroups_y, workgroups_z });
61536179
}
61546180
}
61556181

@@ -6827,7 +6853,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68276853
}
68286854

68296855
ggml_vk_sync_buffers(subctx);
6830-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6856+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68316857
} else if (op == GGML_OP_ROPE || op == GGML_OP_ROPE_BACK) {
68326858
// Empty src2 is possible in rope, but the shader needs a buffer
68336859
vk_subbuffer subbuf_z;
@@ -6838,26 +6864,26 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68386864
}
68396865

68406866
ggml_vk_sync_buffers(subctx);
6841-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6867+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68426868
} else if (op == GGML_OP_IM2COL) {
68436869
// im2col uses only src1 and dst buffers
68446870
ggml_vk_sync_buffers(subctx);
6845-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6871+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68466872
} else if (op == GGML_OP_COUNT_EQUAL) {
68476873
ggml_vk_sync_buffers(subctx);
68486874
// count_equal assumes that destination buffer is initialized with zeroes
68496875
ggml_vk_buffer_memset_async(subctx, d_D, d_buf_offset, 0, d_sz);
68506876
ggml_vk_sync_buffers(subctx);
6851-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6877+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68526878
} else if (use_src2) {
68536879
ggml_vk_sync_buffers(subctx);
6854-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6880+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68556881
} else if (use_src1) {
68566882
ggml_vk_sync_buffers(subctx);
6857-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6883+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68586884
} else {
68596885
ggml_vk_sync_buffers(subctx);
6860-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6886+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68616887
}
68626888
}
68636889

@@ -7026,7 +7052,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
70267052
vk_subbuffer{ d_srcs[4], src_offsets[4], src_sizes[4] },
70277053
vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70287054
vk_subbuffer{ d_D, dst_offset, dst_size }
7029-
}, sizeof(vk_op_rwkv_wkv6_push_constants), &pc, elements);
7055+
}, pc, elements);
70307056
} else if (version == 7) {
70317057
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {
70327058
vk_subbuffer{ d_srcs[0], src_offsets[0], src_sizes[0] },
@@ -7037,7 +7063,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
70377063
vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70387064
vk_subbuffer{ d_srcs[6], src_offsets[6], src_sizes[6] },
70397065
vk_subbuffer{ d_D, dst_offset, dst_size }
7040-
}, sizeof(vk_op_rwkv_wkv7_push_constants), &pc, elements);
7066+
}, pc, elements);
70417067
} else {
70427068
// shouldn't happen
70437069
GGML_ASSERT(false);
@@ -7174,7 +7200,7 @@ static void ggml_vk_op_f32_opt_step_adamw(ggml_backend_vk_context * ctx, vk_cont
71747200
vk_subbuffer{ d_GM, gm_offset, gm_size },
71757201
vk_subbuffer{ d_GV, gv_offset, gv_size },
71767202
vk_subbuffer{ d_P, p_offset, p_size },
7177-
}, sizeof(vk_op_push_constants), &pc, elements);
7203+
}, pc, elements);
71787204
}
71797205

71807206
static void ggml_vk_opt_step_adamw(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_tensor * dst, bool dryrun = false) {
@@ -8063,7 +8089,7 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
80638089
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
80648090
ggml_vk_ctx_begin(ctx->device, subctx);
80658091
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
8066-
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
8092+
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc, { (uint32_t)ne, 1, 1});
80678093
ggml_vk_ctx_end(subctx);
80688094

80698095
auto begin = std::chrono::high_resolution_clock::now();

0 commit comments

Comments
 (0)