Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NpuSyncOp generation to AIEDmaToNpu #1114

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,46 @@ struct DmaWaitToNpuPattern : OpConversionPattern<NpuDmaWaitOp> {
}
};

std::optional<AIE::ShimDMAAllocationOp>
getAllocOpForSymbol(SmallVector<AIE::ShimDMAAllocationOp> shimDmaAllocOps,
StringRef sym_name) {
for (auto shimDmaAllocOp : shimDmaAllocOps)
if (shimDmaAllocOp.getSymName() == sym_name)
return shimDmaAllocOp;
return std::nullopt;
}

void insertNpuSyncOpForResults(AIE::DeviceOp device) {
SmallVector<AIE::ShimDMAAllocationOp> shimDmaAllocOps;
device.walk([&](AIE::ShimDMAAllocationOp shimDmaAllocOp) {
shimDmaAllocOps.push_back(shimDmaAllocOp);
});
device.walk([&](mlir::func::FuncOp f) {
SmallVector<AIEX::NpuDmaMemcpyNdOp> dmas;
Operation *returnOp = nullptr;
f.walk([&](mlir::func::ReturnOp op) { returnOp = op.getOperation(); });
f.walk([&](AIEX::NpuDmaMemcpyNdOp dma) { dmas.push_back(dma); });
for (auto dma : dmas) {
if (auto infoOp =
getAllocOpForSymbol(shimDmaAllocOps, dma.getMetadata())) {
if (infoOp->getChannelDir() == AIE::DMAChannelDir::S2MM) {
// Found dma op copying results to host
OpBuilder builder(dma);
auto col = builder.getI32IntegerAttr(infoOp->getCol());
auto row = builder.getI32IntegerAttr(0);
auto dir = builder.getI32IntegerAttr(0);
auto chan = builder.getI32IntegerAttr(infoOp->getChannelIndex());
auto col_num = builder.getI32IntegerAttr(1);
auto row_num = builder.getI32IntegerAttr(1);
builder.setInsertionPoint(returnOp);
builder.create<AIEX::NpuSyncOp>(dma->getLoc(), col, row, dir, chan,
col_num, row_num);
}
}
}
Comment on lines +441 to +458
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want to unconditionally add a sync to every outgoing memcpy. For example, what if we are collecting N output tiles at the shim and only need to sync at the end? The N-1 extraneous syncs will have a performance penalty vs. the single (manually inserted) sync at the end.

});
}

struct AIEDmaToNpuPass : AIEDmaToNpuBase<AIEDmaToNpuPass> {
void runOnOperation() override {

Expand All @@ -441,6 +481,9 @@ struct AIEDmaToNpuPass : AIEDmaToNpuBase<AIEDmaToNpuPass> {
patterns.insert<PushToNpuPattern>(&getContext(), cachingGetter);
patterns.insert<RtpToNpuPattern>(&getContext());

// Insert sync op after copying data out to host
insertNpuSyncOpForResults(device);

if (failed(applyPartialConversion(device, target, std::move(patterns))))
signalPassFailure();
}
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/dma_transpose/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(
metadata="in", bd_id=1, mem=A, sizes=[1, K, M, 1], strides=[1, 1, K]
)
npu_sync(column=0, row=0, direction=0, channel=0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer that the tests keep the sync explicit, but using aiex.npu.dma_wait instead of aiex.npu.sync

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer that the tests keep the sync explicit, but using aiex.npu.dma_wait instead of aiex.npu.sync

Should we close this PR in favor of #1791 ? Or is there still a desire to insert the sync/wait automatically?


print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,6 @@ def sequence(A, B, C):
strides=[0, 0, 0],
)

for i in range(n_cores):
npu_sync(column=i, row=0, direction=0, channel=0)

print(ctx.module)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ def sequence(A, B, C):
strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s],
)

npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,6 @@ def sequence(A, B, C):
sizes=[N_div_n_div_n_cols, K_div_k, k, n_in_i32s],
strides=[n_x_n_cols_in_i32s, k_x_N_in_i32s, N_in_i32s],
)
for i in range(n_cols):
npu_sync(column=i, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/matrix_scalar_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def sequence(inTensor, notUsed, outTensor):
sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
strides=[1, 1, IMAGE_WIDTH],
)
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/passthrough_dmas/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def core_body():
def sequence(A, B, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/passthrough_kernel/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def sequence(inTensor, outTensor, notUsed):
mem=outTensor,
sizes=[1, 1, 1, tensorSizeInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_exp/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def sequence(A, C):
npu_dma_memcpy_nd(
metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
)
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_reduce_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def core_body():
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_reduce_max/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def core_body():
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_reduce_min/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def core_body():
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_scalar_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def sequence(inTensor, outTensor):
npu_dma_memcpy_nd(
metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, PROBLEM_SIZE]
)
npu_sync(column=0, row=0, direction=0, channel=0)


# Declares that subsequent code is in mlir-aie context
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_scalar_mul/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def sequence(A, F, C):
)
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N_in_i32s])
npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1])
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_vector_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_vector_modulo/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_vector_mul/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
2 changes: 0 additions & 2 deletions programming_examples/ml/bottleneck/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,8 +631,6 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
sizes=[1, 1, 1, totalWeightsSize32b],
)

npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)


Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/conv2d/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def sequence(I, W, O):
mem=W,
sizes=[1, 1, 1, weightsInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/conv2d_fused_relu/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def sequence(I, W, O):
mem=W,
sizes=[1, 1, 1, weightsInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/eltwise_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(
metadata="inB", bd_id=2, mem=B, sizes=[1, 1, 1, B_sz_in_i32s]
)
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/eltwise_mul/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ def sequence(A, B, C):
npu_dma_memcpy_nd(
metadata="inB", bd_id=2, mem=B, sizes=[1, 1, 1, B_sz_in_i32s]
)
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/relu/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def sequence(A, C):
npu_dma_memcpy_nd(
metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
)
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
2 changes: 0 additions & 2 deletions programming_examples/ml/resnet/layers_conv2_x/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,6 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
sizes=[1, 1, 1, totalWeightsSize32b_rest],
)

npu_sync(column=1, row=0, direction=0, channel=0)

res = ctx.module.operation.verify()
if res == True:
print(ctx.module)
Expand Down
1 change: 0 additions & 1 deletion programming_examples/ml/softmax/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def sequence(A, C):
npu_dma_memcpy_nd(
metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
)
npu_sync(column=0, row=0, direction=0, channel=0)


try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ def sequence(I, B, O):
mem=O,
sizes=[1, 1, 1, height * lineWidthInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ def sequence(inTensor, notUsed, outTensor):
mem=outTensor,
sizes=[1, 1, 1, tensorSizeInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ def sequence(I, B, O):
mem=I,
sizes=[1, 1, 1, tensorSizeInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
1 change: 0 additions & 1 deletion programming_examples/vision/vision_passthrough/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def sequence(inTensor, notUsed, outTensor):
mem=outTensor,
sizes=[1, 1, 1, tensorSizeInInt32s],
)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ module @passThroughLine_aie2 {
//dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32>
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ module @passThroughLine_aie2 {
//dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32>
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ module @passThroughLine_aie2 {
//dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32>
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def sequence(inTensor, notUsed, outTensor):
npu_dma_memcpy_nd(
metadata="in", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48]
)
npu_sync(column=0, row=0, direction=0, channel=0)

res = ctx.module.operation.verify()
if res == True:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def sequence(inTensor, notUsed, outTensor):
npu_dma_memcpy_nd(
metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48]
)
npu_sync(column=0, row=0, direction=0, channel=0)

res = ctx.module.operation.verify()
if res == True:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def sequence(inTensor, notUsed, outTensor):
npu_dma_memcpy_nd(
metadata="in", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48]
)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
1 change: 0 additions & 1 deletion programming_guide/section-4/section-4b/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def sequence(A, F, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 4096])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, 4096])
npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1])
npu_sync(column=0, row=0, direction=0, channel=0)


with mlir_mod_ctx() as ctx:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ module {
func.func @bobsyouruncle(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 0 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_in} : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 1 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_out} : memref<64xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
}
Expand Down
1 change: 0 additions & 1 deletion test/npu-xrt/add_314_using_dma_op/aie.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ module {
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}

Expand Down
1 change: 0 additions & 1 deletion test/npu-xrt/add_one_objFifo/aie.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ module {
%c64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32>
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32>
aiex.npu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 }
return
}
}
Expand Down
1 change: 0 additions & 1 deletion test/npu-xrt/add_one_two/aie1.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ module {
%c64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32>
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32>
aiex.npu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 }
return
}
}
Expand Down
1 change: 0 additions & 1 deletion test/npu-xrt/add_one_using_dma/aie.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ module {
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}

Expand Down
1 change: 0 additions & 1 deletion test/npu-xrt/cascade_flows/aie.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ module {
%c64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32>
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32>
aiex.npu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 }
return
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,6 @@ module {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16]) {id = 0 : i64, metadata = @airMemcpyId4} : memref<16x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16]) {id = 1 : i64, metadata = @airMemcpyId5} : memref<16x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16]) {id = 2 : i64, metadata = @airMemcpyId12} : memref<16x16xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
} {sym_name = "segment_0"}
Expand Down
Loading
Loading