Skip to content

Commit a11e419

Browse files
add test and minor fixes
1 parent 3d864d9 commit a11e419

File tree

3 files changed

+38
-14
lines changed

3 files changed

+38
-14
lines changed

build_tools/ci/cpu_comparison/run_test.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,9 @@ function run_test() {
303303
run_test \
304304
--test_file ${THIS_DIR}/test_files/matmul_int32.mlir
305305

306+
run_test \
307+
--test_file ${THIS_DIR}/test_files/three_matmuls.mlir
308+
306309
run_test \
307310
--name_prefix "matmul" \
308311
--lhs_rhs_type "bf16" \
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// This test shows arbitory matmuls that would have producer consumer relationships
2+
// across different dispatches running on CI.
3+
4+
// These lines are strictly required by the script which generates input data:
5+
//
6+
// input 32x32xf32
7+
// input 32x32xf32
8+
// input 32x4xf32
9+
// input 4x32xf32
10+
11+
!A_TYPE = tensor<32x32xf32>
12+
!B_TYPE = tensor<32x4xf32>
13+
!C_TYPE = tensor <4x32xf32>
14+
!D_TYPE = tensor <4x4xf32>
15+
func.func @two_mm(%lhs : !A_TYPE,
16+
%rhs : !A_TYPE, %rhs_2 : !B_TYPE, %lhs_2 : !C_TYPE) -> !D_TYPE {
17+
%empty = tensor.empty() : !A_TYPE
18+
%empty_2 = tensor.empty() : !B_TYPE
19+
%empty_3 = tensor.empty() : !D_TYPE
20+
%cst = arith.constant 0.0 : f32
21+
%fill = linalg.fill ins(%cst : f32) outs(%empty : !A_TYPE) -> !A_TYPE
22+
%fill_2 = linalg.fill ins(%cst : f32) outs(%empty_2 : !B_TYPE) -> !B_TYPE
23+
%fill_3 = linalg.fill ins(%cst : f32) outs(%empty_3 : !D_TYPE) -> !D_TYPE
24+
%2 = linalg.matmul ins(%lhs, %rhs : !A_TYPE, !A_TYPE)
25+
outs(%fill : !A_TYPE) -> !A_TYPE
26+
%3 = linalg.matmul ins(%2, %rhs_2 : !A_TYPE, !B_TYPE)
27+
outs(%fill_2 : !B_TYPE) -> !B_TYPE
28+
%4 = linalg.matmul ins(%lhs_2, %3 : !C_TYPE, !B_TYPE)
29+
outs(%fill_3 : !D_TYPE) -> !D_TYPE
30+
return %4 : !D_TYPE
31+
}

compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
302302
llvm::sys::path::append(npuInstPath,
303303
entryPointNamesFb[ordinal] + ".npu.txt");
304304

305-
// Convert ordinal to hexadecimal string for xclbin kern id
305+
// Convert ordinal to hexadecimal string for xclbin kernel id.
306306
std::stringstream ss;
307307
ss << "0x" << std::hex << ordinal + 10;
308308
std::string ordinalHex = ss.str();
@@ -370,7 +370,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
370370
int result = llvm::sys::ExecuteAndWait(cmdArgs[0], cmdArgs, cmdEnvRefs);
371371
if (result != 0 && AttemptingMerge) {
372372
// we failed to create xclbin but maybe we failed becuase we were trying
373-
// to merge the kerenel in exisiting kernel, try again to see if perhaps
373+
// to merge the kerenel in exisiting xclbin, try again to see if perhaps
374374
// we have success if we dont try to merge.
375375
AttemptingMerge = false;
376376
result =
@@ -400,18 +400,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
400400
asmInstrIndices[ordinal] = asmInstrRefs.size();
401401
asmInstrRefs.push_back(
402402
iree_amd_aie_hal_xrt_AsmInstDef_create(builder, npuInstrsVec));
403-
/*
404-
xclbinIn = openInputFile(xclbinPath, &errorMessage);
405-
if (!xclbinIn) {
406-
moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
407-
}
408-
auto xclbinStringRef = builder.createString(xclbinIn->getBuffer());
409-
xclbinIndices[ordinal] = xclbinRefs.size();
410-
xclbinRefs.push_back(
411-
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
412-
*/
413403
}
414-
// write out the final xclbins to flatbuffer
404+
// Write out the final xclbins to flatbuffer.
415405
for (auto xclbinPath : xclbinPaths) {
416406
llvm::outs() << "writing xclbin from path: " << xclbinPath << "\n";
417407
std::string errorMessage;
@@ -424,7 +414,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
424414
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
425415
}
426416

427-
// Serialize the executable to flatbuffer format
417+
// Serialize the executable to flatbuffer format.
428418
auto entryPointsRef = builder.createStringVec(entryPointNamesFb);
429419

430420
iree_amd_aie_hal_xrt_ExecutableDef_entry_points_add(builder, entryPointsRef);

0 commit comments

Comments
 (0)