Skip to content

Commit 0dc6aa2

Browse files
committed
[UR] Implement urCommandBufferAppendKernelLaunchWithArgsExp for HIP
Signed-off-by: Lukasz Dorau <[email protected]>
1 parent 546016e commit 0dc6aa2

File tree

1 file changed

+67
-16
lines changed

1 file changed

+67
-16
lines changed

unified-runtime/source/adapters/hip/command_buffer.cpp

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "kernel.hpp"
1919
#include "memory.hpp"
2020
#include "queue.hpp"
21+
#include "sampler.hpp"
2122

2223
#include <cstring>
2324

@@ -305,22 +306,72 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
305306

306307
UR_APIEXPORT ur_result_t UR_APICALL
307308
urCommandBufferAppendKernelLaunchWithArgsExp(
308-
ur_exp_command_buffer_handle_t /* hCommandBuffer */,
309-
ur_kernel_handle_t /* hKernel */, uint32_t /* workDim */,
310-
const size_t * /* pGlobalWorkOffset */,
311-
const size_t * /* pGlobalWorkSize */, const size_t * /* pLocalWorkSize */,
312-
uint32_t /* numArgs */, const ur_exp_kernel_arg_properties_t * /* pArgs */,
313-
uint32_t /* numKernelAlternatives */,
314-
ur_kernel_handle_t * /* phKernelAlternatives */,
315-
uint32_t /* numSyncPointsInWaitList */,
316-
const ur_exp_command_buffer_sync_point_t * /* pSyncPointWaitList */,
317-
uint32_t /* numEventsInWaitList */,
318-
const ur_event_handle_t * /* phEventWaitList */,
319-
ur_exp_command_buffer_sync_point_t * /* pSyncPoint */,
320-
ur_event_handle_t * /* phEvent */,
321-
ur_exp_command_buffer_command_handle_t * /* phCommand */) {
322-
// TODO LDORAU HIP
323-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
309+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
310+
uint32_t workDim, const size_t *pGlobalWorkOffset,
311+
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
312+
uint32_t numArgs, const ur_exp_kernel_arg_properties_t *pArgs,
313+
uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives,
314+
uint32_t numSyncPointsInWaitList,
315+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
316+
uint32_t /*numEventsInWaitList*/,
317+
const ur_event_handle_t * /*phEventWaitList*/,
318+
ur_exp_command_buffer_sync_point_t *pSyncPoint,
319+
ur_event_handle_t * /*phEvent*/,
320+
ur_exp_command_buffer_command_handle_t *phCommand) {
321+
// Preconditions
322+
// Command handles can only be obtained from updatable command-buffers
323+
UR_ASSERT(!(phCommand && !hCommandBuffer->IsUpdatable),
324+
UR_RESULT_ERROR_INVALID_OPERATION);
325+
UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(),
326+
UR_RESULT_ERROR_INVALID_KERNEL);
327+
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
328+
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
329+
330+
try {
331+
for (uint32_t i = 0; i < numArgs; i++) {
332+
switch (pArgs[i].type) {
333+
case UR_EXP_KERNEL_ARG_TYPE_LOCAL: {
334+
hKernel->setKernelLocalArg(pArgs[i].index, pArgs[i].size);
335+
break;
336+
}
337+
case UR_EXP_KERNEL_ARG_TYPE_VALUE: {
338+
hKernel->setKernelArg(pArgs[i].index, pArgs[i].size,
339+
pArgs[i].value.value);
340+
break;
341+
}
342+
case UR_EXP_KERNEL_ARG_TYPE_POINTER: {
343+
// setKernelArg is expecting a pointer to our argument
344+
hKernel->setKernelArg(pArgs[i].index, pArgs[i].size,
345+
&pArgs[i].value.pointer);
346+
break;
347+
}
348+
case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: {
349+
ur_kernel_arg_mem_obj_properties_t Props = {
350+
UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr,
351+
pArgs[i].value.memObjTuple.flags};
352+
UR_CALL(urKernelSetArgMemObj(hKernel, pArgs[i].index, &Props,
353+
pArgs[i].value.memObjTuple.hMem));
354+
break;
355+
}
356+
case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: {
357+
uint32_t SamplerProps = pArgs[i].value.sampler->Props;
358+
hKernel->setKernelArg(pArgs[i].index, sizeof(uint32_t),
359+
(void *)&SamplerProps);
360+
break;
361+
}
362+
default:
363+
return UR_RESULT_ERROR_INVALID_ENUMERATION;
364+
}
365+
}
366+
} catch (ur_result_t Err) {
367+
return Err;
368+
}
369+
370+
return urCommandBufferAppendKernelLaunchExp(
371+
hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
372+
pLocalWorkSize, numKernelAlternatives, phKernelAlternatives,
373+
numSyncPointsInWaitList, pSyncPointWaitList, 0 /*numEventsInWaitList*/,
374+
nullptr /*phEventWaitList*/, pSyncPoint, nullptr /*phEvent*/, phCommand);
324375
}
325376

326377
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(

0 commit comments

Comments
 (0)