Skip to content

Commit bb54553

Browse files
Signed-off-by: Abhishek Varma <[email protected]>
1 parent b5dee68 commit bb54553

File tree

7 files changed

+54
-41
lines changed

7 files changed

+54
-41
lines changed

build_tools/ci/cpu_comparison/run.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,29 +2493,27 @@ def __init__(self):
24932493
# Note: The error tolerance for npu4 is higher than that for npu1_4col.
24942494
# npu1_4col uses a lookup table to compute exponentials,
24952495
# whereas npu4 uses a native exp2 instruction, which is less accurate.
2496-
# TODO: Disable till iree-org/iree issue https://github.com/iree-org/iree/issues/21633
2497-
# gets fixed.
2498-
# for target, rtol in [["npu1_4col", 4e-2], ["npu4", 8e-2]]:
2499-
# for run_benchmark in [False, True]:
2500-
# self.register(
2501-
# Softmax(
2502-
# 8192,
2503-
# 1024,
2504-
# "bf16",
2505-
# test_params=TestParams(
2506-
# run_on_target=target,
2507-
# name_suffix=target,
2508-
# use_chess=True,
2509-
# use_chess_for_ukernel=True,
2510-
# use_ukernel=True,
2511-
# tile_pipeline="general-copy",
2512-
# run_benchmark=run_benchmark,
2513-
# n_repeats=2,
2514-
# n_kernel_runs=100,
2515-
# rtol=rtol,
2516-
# ),
2517-
# )
2518-
# )
2496+
for target, rtol in [["npu1_4col", 4e-2], ["npu4", 8e-2]]:
2497+
for run_benchmark in [False, True]:
2498+
self.register(
2499+
Softmax(
2500+
8192,
2501+
1024,
2502+
"bf16",
2503+
test_params=TestParams(
2504+
run_on_target=target,
2505+
name_suffix=target,
2506+
use_chess=True,
2507+
use_chess_for_ukernel=True,
2508+
use_ukernel=True,
2509+
tile_pipeline="general-copy",
2510+
run_benchmark=run_benchmark,
2511+
n_repeats=2,
2512+
n_kernel_runs=100,
2513+
rtol=rtol,
2514+
),
2515+
)
2516+
)
25192517

25202518
# Reduction op tests:
25212519
self.register(

runtime/src/iree-amd-aie/driver/xrt-lite/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ iree_cc_library(
4040
iree::base
4141
iree::base::core_headers
4242
iree::hal::utils::deferred_command_buffer
43+
iree::hal::utils::queue_emulation
4344
iree::hal::utils::semaphore_base
4445
iree::base::internal::flatcc::parsing
4546
iree-amd-aie::schemas::pdi_executable_def_c_fbs

runtime/src/iree-amd-aie/driver/xrt-lite/device.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "iree-amd-aie/driver/xrt-lite/util.h"
1616
#include "iree/hal/utils/deferred_command_buffer.h"
1717
#include "iree/hal/utils/deferred_work_queue.h"
18+
#include "iree/hal/utils/queue_emulation.h"
1819

1920
#define ARENA_BLOCK_SIZE (32 * 1024)
2021

@@ -98,8 +99,9 @@ static iree_status_t iree_hal_xrt_lite_device_create_command_buffer(
9899
}
99100

100101
static iree_status_t iree_hal_xrt_lite_device_create_semaphore(
101-
iree_hal_device_t* base_device, uint64_t initial_value,
102-
iree_hal_semaphore_flags_t flags, iree_hal_semaphore_t** out_semaphore) {
102+
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
103+
uint64_t initial_value, iree_hal_semaphore_flags_t flags,
104+
iree_hal_semaphore_t** out_semaphore) {
103105
IREE_TRACE_ZONE_BEGIN(z0);
104106

105107
iree_hal_xrt_lite_device* device = IREE_HAL_XRT_LITE_CHECKED_VTABLE_CAST(
@@ -190,8 +192,9 @@ static iree_status_t iree_hal_xrt_lite_device_queue_alloca(
190192
iree_hal_xrt_lite_device* device = IREE_HAL_XRT_LITE_CHECKED_VTABLE_CAST(
191193
base_device, iree_hal_xrt_lite_device_vtable, iree_hal_xrt_lite_device);
192194
IREE_RETURN_AND_END_ZONE_IF_ERROR(
193-
z0, iree_hal_semaphore_list_wait(wait_semaphore_list,
194-
iree_infinite_timeout()));
195+
z0,
196+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
197+
IREE_HAL_WAIT_FLAG_DEFAULT));
195198
IREE_RETURN_AND_END_ZONE_IF_ERROR(
196199
z0, iree_hal_allocator_allocate_buffer(device->device_allocator, params,
197200
allocation_size, out_buffer));
@@ -207,8 +210,9 @@ static iree_status_t iree_hal_xrt_lite_device_queue_dealloca(
207210
const iree_hal_semaphore_list_t wait_semaphore_list,
208211
const iree_hal_semaphore_list_t signal_semaphore_list,
209212
iree_hal_buffer_t* buffer, iree_hal_alloca_flags_t flags) {
210-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
211-
iree_infinite_timeout()));
213+
IREE_RETURN_IF_ERROR(
214+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
215+
IREE_HAL_WAIT_FLAG_DEFAULT));
212216
iree_status_t status = iree_hal_semaphore_list_signal(signal_semaphore_list);
213217
return status;
214218
}

runtime/src/iree-amd-aie/driver/xrt-lite/direct_command_buffer.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,11 @@ iree_status_t iree_hal_xrt_lite_direct_command_buffer_create(
6464
command_buffer->host_allocator = host_allocator;
6565
command_buffer->device = device;
6666
iree_arena_initialize(block_pool, &command_buffer->arena);
67-
iree_status_t status =
68-
iree_hal_resource_set_allocate(block_pool, &command_buffer->resource_set);
67+
iree_status_t status = iree_ok_status();
68+
if (!iree_all_bits_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_UNRETAINED)) {
69+
status = iree_hal_resource_set_allocate(block_pool,
70+
&command_buffer->resource_set);
71+
}
6972
if (iree_status_is_ok(status)) {
7073
*out_command_buffer = &command_buffer->base;
7174
} else {

runtime/src/iree-amd-aie/driver/xrt/direct_command_buffer.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,11 @@ iree_status_t iree_hal_xrt_direct_command_buffer_create(
8282
&iree_hal_xrt_direct_command_buffer_vtable, &command_buffer->base);
8383
command_buffer->host_allocator = host_allocator;
8484
iree_arena_initialize(block_pool, &command_buffer->arena);
85-
iree_status_t status =
86-
iree_hal_resource_set_allocate(block_pool, &command_buffer->resource_set);
85+
iree_status_t status = iree_ok_status();
86+
if (!iree_all_bits_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_UNRETAINED)) {
87+
status = iree_hal_resource_set_allocate(block_pool,
88+
&command_buffer->resource_set);
89+
}
8790
if (iree_status_is_ok(status)) {
8891
*out_command_buffer = &command_buffer->base;
8992
} else {

runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,9 @@ static iree_status_t iree_hal_xrt_device_import_file(
230230
}
231231

232232
static iree_status_t iree_hal_xrt_device_create_semaphore(
233-
iree_hal_device_t* base_device, uint64_t initial_value,
234-
iree_hal_semaphore_flags_t flags, iree_hal_semaphore_t** out_semaphore) {
233+
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
234+
uint64_t initial_value, iree_hal_semaphore_flags_t flags,
235+
iree_hal_semaphore_t** out_semaphore) {
235236
iree_hal_xrt_device_t* device = iree_hal_xrt_device_cast(base_device);
236237
return iree_hal_xrt_semaphore_create(device->host_allocator, initial_value,
237238
out_semaphore);
@@ -251,8 +252,9 @@ static iree_status_t iree_hal_xrt_device_queue_alloca(
251252
iree_device_size_t allocation_size, iree_hal_execute_flags_t flags,
252253
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
253254
// TODO: queue-ordered allocations.
254-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
255-
iree_infinite_timeout()));
255+
IREE_RETURN_IF_ERROR(
256+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
257+
IREE_HAL_WAIT_FLAG_DEFAULT));
256258
IREE_RETURN_IF_ERROR(
257259
iree_hal_allocator_allocate_buffer(iree_hal_device_allocator(base_device),
258260
params, allocation_size, out_buffer));
@@ -265,8 +267,9 @@ static iree_status_t iree_hal_xrt_device_queue_dealloca(
265267
const iree_hal_semaphore_list_t wait_semaphore_list,
266268
const iree_hal_semaphore_list_t signal_semaphore_list,
267269
iree_hal_buffer_t* buffer, iree_hal_alloca_flags_t flags) {
268-
IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list,
269-
iree_infinite_timeout()));
270+
IREE_RETURN_IF_ERROR(
271+
iree_hal_semaphore_list_wait(wait_semaphore_list, iree_infinite_timeout(),
272+
IREE_HAL_WAIT_FLAG_DEFAULT));
270273
iree_status_t status = iree_hal_semaphore_list_signal(signal_semaphore_list);
271274
return status;
272275
}
@@ -352,7 +355,8 @@ static iree_status_t iree_hal_xrt_device_queue_flush(
352355

353356
static iree_status_t iree_hal_xrt_device_wait_semaphores(
354357
iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
355-
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout) {
358+
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout,
359+
iree_hal_wait_flags_t flags) {
356360
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
357361
"Unimplemented semaphore wait");
358362
}

third_party/iree

Submodule iree updated 232 files

0 commit comments

Comments
 (0)