Skip to content

Commit 5f506ce

Browse files
Signed-off-by: Abhishek Varma <[email protected]>
1 parent b5dee68 commit 5f506ce

File tree

4 files changed

+39
-35
lines changed

4 files changed

+39
-35
lines changed

build_tools/ci/cpu_comparison/run.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,29 +2493,27 @@ def __init__(self):
24932493
# Note: The error tolerance for npu4 is higher than that for npu1_4col.
24942494
# npu1_4col uses a lookup table to compute exponentials,
24952495
# whereas npu4 uses a native exp2 instruction, which is less accurate.
2496-
# TODO: Disable till iree-org/iree issue https://github.com/iree-org/iree/issues/21633
2497-
# gets fixed.
2498-
# for target, rtol in [["npu1_4col", 4e-2], ["npu4", 8e-2]]:
2499-
# for run_benchmark in [False, True]:
2500-
# self.register(
2501-
# Softmax(
2502-
# 8192,
2503-
# 1024,
2504-
# "bf16",
2505-
# test_params=TestParams(
2506-
# run_on_target=target,
2507-
# name_suffix=target,
2508-
# use_chess=True,
2509-
# use_chess_for_ukernel=True,
2510-
# use_ukernel=True,
2511-
# tile_pipeline="general-copy",
2512-
# run_benchmark=run_benchmark,
2513-
# n_repeats=2,
2514-
# n_kernel_runs=100,
2515-
# rtol=rtol,
2516-
# ),
2517-
# )
2518-
# )
2496+
for target, rtol in [["npu1_4col", 4e-2], ["npu4", 8e-2]]:
2497+
for run_benchmark in [False, True]:
2498+
self.register(
2499+
Softmax(
2500+
8192,
2501+
1024,
2502+
"bf16",
2503+
test_params=TestParams(
2504+
run_on_target=target,
2505+
name_suffix=target,
2506+
use_chess=True,
2507+
use_chess_for_ukernel=True,
2508+
use_ukernel=True,
2509+
tile_pipeline="general-copy",
2510+
run_benchmark=run_benchmark,
2511+
n_repeats=2,
2512+
n_kernel_runs=100,
2513+
rtol=rtol,
2514+
),
2515+
)
2516+
)
25192517

25202518
# Reduction op tests:
25212519
self.register(

runtime/src/iree-amd-aie/driver/xrt-lite/device.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,9 @@ static iree_status_t iree_hal_xrt_lite_device_queue_alloca(
190190
iree_hal_xrt_lite_device* device = IREE_HAL_XRT_LITE_CHECKED_VTABLE_CAST(
191191
base_device, iree_hal_xrt_lite_device_vtable, iree_hal_xrt_lite_device);
192192
IREE_RETURN_AND_END_ZONE_IF_ERROR(
193-
z0, iree_hal_semaphore_list_wait(wait_semaphore_list,
194-
iree_infinite_timeout()));
193+
z0,
194+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
195+
sIREE_HAL_WAIT_FLAG_DEFAULT));
195196
IREE_RETURN_AND_END_ZONE_IF_ERROR(
196197
z0, iree_hal_allocator_allocate_buffer(device->device_allocator, params,
197198
allocation_size, out_buffer));
@@ -207,8 +208,9 @@ static iree_status_t iree_hal_xrt_lite_device_queue_dealloca(
207208
const iree_hal_semaphore_list_t wait_semaphore_list,
208209
const iree_hal_semaphore_list_t signal_semaphore_list,
209210
iree_hal_buffer_t* buffer, iree_hal_alloca_flags_t flags) {
210-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
211-
iree_infinite_timeout()));
211+
IREE_RETURN_IF_ERROR(
212+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
213+
IREE_HAL_WAIT_FLAG_DEFAULT));
212214
iree_status_t status = iree_hal_semaphore_list_signal(signal_semaphore_list);
213215
return status;
214216
}

runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,9 @@ static iree_status_t iree_hal_xrt_device_import_file(
230230
}
231231

232232
static iree_status_t iree_hal_xrt_device_create_semaphore(
233-
iree_hal_device_t* base_device, uint64_t initial_value,
234-
iree_hal_semaphore_flags_t flags, iree_hal_semaphore_t** out_semaphore) {
233+
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
234+
uint64_t initial_value, iree_hal_semaphore_flags_t flags,
235+
iree_hal_semaphore_t** out_semaphore) {
235236
iree_hal_xrt_device_t* device = iree_hal_xrt_device_cast(base_device);
236237
return iree_hal_xrt_semaphore_create(device->host_allocator, initial_value,
237238
out_semaphore);
@@ -251,8 +252,9 @@ static iree_status_t iree_hal_xrt_device_queue_alloca(
251252
iree_device_size_t allocation_size, iree_hal_execute_flags_t flags,
252253
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
253254
// TODO: queue-ordered allocations.
254-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
255-
iree_infinite_timeout()));
255+
IREE_RETURN_IF_ERROR(
256+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
257+
IREE_HAL_WAIT_FLAG_DEFAULT));
256258
IREE_RETURN_IF_ERROR(
257259
iree_hal_allocator_allocate_buffer(iree_hal_device_allocator(base_device),
258260
params, allocation_size, out_buffer));
@@ -265,8 +267,9 @@ static iree_status_t iree_hal_xrt_device_queue_dealloca(
265267
const iree_hal_semaphore_list_t wait_semaphore_list,
266268
const iree_hal_semaphore_list_t signal_semaphore_list,
267269
iree_hal_buffer_t* buffer, iree_hal_alloca_flags_t flags) {
268-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
269-
iree_infinite_timeout()));
270+
IREE_RETURN_IF_ERROR(
271+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
272+
IREE_HAL_WAIT_FLAG_DEFAULT));
270273
iree_status_t status = iree_hal_semaphore_list_signal(signal_semaphore_list);
271274
return status;
272275
}
@@ -352,7 +355,8 @@ static iree_status_t iree_hal_xrt_device_queue_flush(
352355

353356
static iree_status_t iree_hal_xrt_device_wait_semaphores(
354357
iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
355-
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout) {
358+
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout,
359+
iree_hal_wait_flags_t flags) {
356360
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
357361
"Unimplemented semaphore wait");
358362
}

third_party/iree

Submodule iree updated 232 files

0 commit comments

Comments
 (0)