Skip to content

Commit b0035f8

Browse files
committed
Test performance and correctness checks and reassoc=true
F32 Correctness: Pass Benchmark: Fails PM issue(run_benchmarks=true) BF16: Correctness: Fails Benchmarks: Pass(but this might be not correct as results are wrong) Run commands: python run.py delete_out_reduction $IREE_DIR --xrt_dir=$XRT_DIR --peano_dir=$PEANO_DIR \ --target_device="npu4" --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS --tests Reduction
1 parent b442141 commit b0035f8

File tree

4 files changed

+18
-17
lines changed

4 files changed

+18
-17
lines changed

build_tools/ci/cpu_comparison/run.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,9 +1039,6 @@ def generate_aie_output(config, aie_vmfb, input_args, function_name, name, outpu
10391039
shell_out(config.reset_npu_script, verbose=config.verbose)
10401040

10411041
start = time.monotonic_ns()
1042-
print(f"Run command iree_run_exe: {run_args}")
1043-
print(f"Run command iree_run_exe: {test_dir}")
1044-
print(f"Run command iree_run_exe: {config.verbose}")
10451042
shell_out(run_args, test_dir, config.verbose)
10461043
run_time = time.monotonic_ns() - start
10471044

@@ -1478,7 +1475,7 @@ def aie_vs_baseline(
14781475
name,
14791476
output_type,
14801477
)
1481-
print(f"SAM: {aie_output}")
1478+
14821479
summary_string = compare(baseline_value, aie_output, rtol, atol)
14831480
if summary_string:
14841481
print(summary_string)
@@ -2489,13 +2486,14 @@ def __init__(self):
24892486
)
24902487

24912488
# Reduction op tests:
2492-
for data_type in ["bf16", "f32"]:
2489+
for data_type in ["bf16"]:
2490+
custom_input = 1.0 * np.ones((8, 512), dtype=np.float16) # bf16
2491+
# custom_input = 1.0 * np.ones((8, 512), dtype=np.float32) # f32
24932492
self.register(
24942493
Reduction(
24952494
file_base_name=f"reduction_sum_{data_type}",
24962495
function_name=f"reduction_sum",
24972496
test_params=TestParams(
2498-
name_suffix=data_type, # used in final test name
24992497
tile_pipeline="general-copy",
25002498
run_on_target=["npu4"],
25012499
use_chess=False,
@@ -2507,10 +2505,8 @@ def __init__(self):
25072505
lower_to_aie_pipeline="objectFifo",
25082506
n_repeats=1,
25092507
n_kernel_runs=1,
2510-
aie_compilation_flags=[
2511-
"--iree-amdaie-num-rows=4",
2512-
"--iree-amdaie-num-cols=4",
2513-
],
2508+
preset_inputs={1: custom_input},
2509+
aie_compilation_flags=["--iree-hal-target-backends=amd-aie"],
25142510
),
25152511
)
25162512
)

build_tools/ci/cpu_comparison/test_files/reduction_sum_bf16.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
// These lines are required for e2e numerical testing:
2-
// input 8x1024xbf16
2+
// input 8x512xbf16
33
// output 8xbf16
44

55
// Constraints:<D0xD1>
66
// D0 = [8, no-limit]
77
// D1 = [16, 1024]
88

9-
!in_ty = tensor<8x1024xbf16>
9+
!in_ty = tensor<8x512xbf16>
1010
!out_ty = tensor<8xbf16>
1111

1212
func.func @reduction_sum(%arg0: !in_ty) -> !out_ty {

build_tools/ci/cpu_comparison/test_files/reduction_sum_f32.mlir

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
// These lines are required for e2e numerical testing:
2-
// input 1024x256xf32
3-
// output 1024xf32
2+
// input 8x512xf32
3+
// output 8xf32
44

55
// Constraints:<D0xD1>
6+
// Format: [Min, Max]
67
// D0 = [2, no-limit]
78
// D1 = [2, 256]
8-
!in_ty = tensor<1024x256xf32>
9-
!out_ty = tensor<1024xf32>
9+
!in_ty = tensor<8x512xf32>
10+
!out_ty = tensor<8xf32>
1011

1112
func.func @reduction_sum(%arg0: !in_ty) -> !out_ty {
1213
%cst = arith.constant 0.0 : f32

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,11 @@ void addMLIRAIELoweringPasses(OpPassManager &pm,
924924
pm.addPass(createCanonicalizerPass());
925925
pm.addPass(createCSEPass());
926926
pm.addPass(aievec::createConvertAIEVecToLLVMPass());
927-
pm.addPass(createConvertVectorToLLVMPass());
927+
{
928+
ConvertVectorToLLVMPassOptions opts{};
929+
opts.reassociateFPReductions = true;
930+
pm.addPass(createConvertVectorToLLVMPass(opt));
931+
}
928932
pm.addPass(memref::createExpandStridedMetadataPass());
929933
pm.addPass(createLowerAffinePass());
930934
pm.addPass(createConvertMathToLLVMPass());

0 commit comments

Comments
 (0)