From f0a60bacc912f194596211f305d15973f5f3fa93 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 11:58:31 -0700 Subject: [PATCH 01/46] Adding flag to run command --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/reduction/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 8c41a9868e..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py index 03a25b90db..0a8c1112d8 100644 --- a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py index 3f04ed0f1f..eb5440e4cd 100644 --- a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 8b91d2e434..84a1f12db7 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index d7eae0bc31..d22c2848bf 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 4fba84bb83..0ecb7adcd2 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * From 21b5a0fdf443ce609b1a25f1fc3e9c8758360ae9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 12:26:24 -0700 Subject: [PATCH 02/46] Correcting the CHECK messsage --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 84a1f12db7..37222b8a78 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -12,7 +12,7 @@ # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! from aie.dialects.aie import * from aie.dialects.aiex import * from aie.helpers.dialects.ext.scf import _for as range_ diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index d22c2848bf..c93e1b21c9 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -12,7 +12,7 @@ # RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! import numpy as np from aie.dialects.aie import * diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 0ecb7adcd2..d0b0f53d36 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -12,7 +12,7 @@ # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! import numpy as np from aie.dialects.aie import * From f5d41ebdd84afd174db5a6b40f5927a8eda114a6 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 12:57:05 -0700 Subject: [PATCH 03/46] Verifying as programming example --- .../dyn_objFifo/nested_loops/Makefile | 66 +++++++++ .../dyn_objFifo/nested_loops/aie2.py | 73 +++++++++ .../dyn_objFifo/nested_loops/kernel.cc | 22 +++ .../dyn_objFifo/nested_loops/test.cpp | 139 ++++++++++++++++++ 4 files changed, 300 insertions(+) create mode 100644 programming_examples/dyn_objFifo/nested_loops/Makefile create mode 100644 programming_examples/dyn_objFifo/nested_loops/aie2.py create mode 100644 programming_examples/dyn_objFifo/nested_loops/kernel.cc create mode 100644 programming_examples/dyn_objFifo/nested_loops/test.cpp diff --git a/programming_examples/dyn_objFifo/nested_loops/Makefile b/programming_examples/dyn_objFifo/nested_loops/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/nested_loops/aie2.py b/programming_examples/dyn_objFifo/nested_loops/aie2.py new file mode 100644 index 0000000000..dee6558c3e --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/aie2.py @@ -0,0 +1,73 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 AMD Inc. + +# REQUIRES: ryzen_ai, valid_xchess_license +# +# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o +# RUN: %python %S/aie2.py > ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe | FileCheck %s +# CHECK: PASS! +import numpy as np + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.helpers.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 50 +O = 250 +n_rows = 5 +dev = AIEDevice.npu1_1col +col = 0 + + +def nested_loops(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + tensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) + + # AIE Core Function declarations + passthrough_10_i32 = external_func( + "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] + ) + + # Set up compute tiles + @core(ComputeTile, "kernel.o") + def core_body(): + for _ in range_(5): + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + for _ in range_(5): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + passthrough_10_i32(elemIn, elemOut) + of_out.release(ObjectFifoPort.Produce, 1) + of_in.release(ObjectFifoPort.Consume, 1) + + # To/from AIE-array data movement + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd( + metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True + ) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O]) + dma_wait(of_in, of_out) + + print(ctx.module) + + +nested_loops() diff --git a/programming_examples/dyn_objFifo/nested_loops/kernel.cc b/programming_examples/dyn_objFifo/nested_loops/kernel.cc new file mode 100644 index 0000000000..d5a796add2 --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/kernel.cc @@ -0,0 +1,22 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void passthrough(const T_in *__restrict in, T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = in[i]; + } +} + +extern "C" { + +void passthrough_10_i32(const int *__restrict in, int *__restrict out) { + passthrough(in, out); +} +} \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/nested_loops/test.cpp b/programming_examples/dyn_objFifo/nested_loops/test.cpp new file mode 100644 index 0000000000..ecd9a90d51 --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/test.cpp @@ -0,0 +1,139 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (50 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (250 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH; j++) { + buf_input[i * WIDTH + j] = i; + std::cout << buf_input[i * WIDTH + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + int expected_output = 0; + int five_repetitions = 0; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + if (five_repetitions == 5) { + expected_output++; + five_repetitions = 0; + } + for (int j = 0; j < WIDTH; j++) { + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * WIDTH + j] << std::endl; + pass &= buf_output[i * WIDTH + j] == expected_output; + } + std::cout << std::endl << std::endl; + five_repetitions++; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} \ No newline at end of file From 77513c8a9346feaa52c6e17528556624013ed778 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 13:48:24 -0700 Subject: [PATCH 04/46] Checking the flags that caused the issue --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..19945d46c2 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 29cf679c03f97883c335ea7a0e9cb4e660c62dac Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:24:53 -0700 Subject: [PATCH 05/46] Revert "Checking the flags that caused the issue" This reverts commit 77513c8a9346feaa52c6e17528556624013ed778. --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 19945d46c2..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 1db8fbf8bacc500fc004b4dd76bdd51672b94fcf Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:29:26 -0700 Subject: [PATCH 06/46] Maybe clang version --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..8f1146f34a 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,8 +10,8 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np From a3bd5192038d0f0b81a58a831b573ee62a7529fc Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:39:52 -0700 Subject: [PATCH 07/46] Revert "Maybe clang version" This reverts commit 1db8fbf8bacc500fc004b4dd76bdd51672b94fcf. --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 8f1146f34a..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,8 +10,8 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 8945f79d57cc38d5e3cda41ef35b9f865adccf2a Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 09:33:00 -0700 Subject: [PATCH 08/46] may be compiler --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..c390a745fd 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: g++ %S/test.cpp -o test.exe -std=c++13 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 46b1a921e7ffca26a70d2f916a3dda0dc10e67f9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 09:47:56 -0700 Subject: [PATCH 09/46] one other check --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index c390a745fd..a1338bcf39 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: g++ %S/test.cpp -o test.exe -std=c++13 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: g++-13 -ggdb %S/test.cpp -o test.exe -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 7fee57d1579bbbdd085da3a4f0f6dae8b5b8f983 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 10:42:07 -0700 Subject: [PATCH 10/46] Same run command for all --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index a1338bcf39..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: g++-13 -ggdb %S/test.cpp -o test.exe -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index c93e1b21c9..366552907b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 60fc2918bcd0ffd3fabbf592a5c02c313dec97f7 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 11:41:15 -0700 Subject: [PATCH 11/46] change chess --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 37222b8a78..c6dd53838d 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index c93e1b21c9..366552907b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index d0b0f53d36..f9539fecfe 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir From da418086ff1f98c74cfea8b9178ade2e17882b87 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 11:42:41 -0700 Subject: [PATCH 12/46] Missed file --- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 366552907b..1c8922df1b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir From d13e6076dadb314a129f4650dc08481ada0570a9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 12:10:19 -0700 Subject: [PATCH 13/46] Checking --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index c6dd53838d..7baa366452 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! from aie.dialects.aie import * from aie.dialects.aiex import * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 1c8922df1b..3f1159da13 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index f9539fecfe..3c53c21cd8 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np From dd87d0b71544cb453dd0a5c3fda0cd5fe2389dc9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 14:38:08 -0700 Subject: [PATCH 14/46] Checking with working test file --- .../two_core_sliding_window/test.cpp | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp index 648924ac4f..ccf951d8dd 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -33,9 +33,24 @@ #include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + int main(int argc, const char *argv[]) { - std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); + std::vector instr_v = load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From 30b7f84e25b139f386b97733adc06b51cab927c0 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 15:04:20 -0700 Subject: [PATCH 15/46] Missing:: --- .../sliding_window/test.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index 648924ac4f..2ffbd6ba56 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -33,6 +33,21 @@ #include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + int main(int argc, const char *argv[]) { std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); From 8f4df8a5fe44dee60f570ed52ffb655954f67a43 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:07:31 -0700 Subject: [PATCH 16/46] Instr load problem --- .../dynamic_object_fifo/sliding_window_conditional/test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp index 648924ac4f..6668c96421 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp @@ -25,9 +25,10 @@ #define KERNEL_NAME "MLIR_AIE" #endif -#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE From 401a55b61d4121239b0b4750cb0fd677165dfb0a Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:10:36 -0700 Subject: [PATCH 17/46] format --- .../dynamic_object_fifo/sliding_window_conditional/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp index 6668c96421..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp @@ -25,7 +25,7 @@ #define KERNEL_NAME "MLIR_AIE" #endif -#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes #define WIDTH 10 From 2b6e8ec7c58cddf009781e80b2f8f85a85d16e6c Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:24:02 -0700 Subject: [PATCH 18/46] Rest of them --- .../sliding_window/test.cpp | 20 +++-------------- .../two_core_sliding_window/test.cpp | 22 ++++--------------- 2 files changed, 7 insertions(+), 35 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index 2ffbd6ba56..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "build/final.xclbin" +#define XCLBIN "final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" +#define INSTS_TXT "insts.txt" #endif #ifndef KERNEL_NAME @@ -28,26 +28,12 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE #include "test_utils.h" -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - int main(int argc, const char *argv[]) { std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp index ccf951d8dd..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "build/final.xclbin" +#define XCLBIN "final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" +#define INSTS_TXT "insts.txt" #endif #ifndef KERNEL_NAME @@ -28,29 +28,15 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE #include "test_utils.h" -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - int main(int argc, const char *argv[]) { - std::vector instr_v = load_instr_sequence(INSTS_TXT); + std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From a8fabc07aaad4fea1f70af8db4c66be64706511b Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 17:00:46 -0700 Subject: [PATCH 19/46] Old test --- .../sliding_window/test.cpp | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index c25d9358f6..5c78b0e986 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -28,15 +28,27 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE -#include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} int main(int argc, const char *argv[]) { - std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); + std::vector instr_v = load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From 3b4f1afaf137e0f00d2bec02a60b4d3d5c84d18c Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Tue, 12 Nov 2024 13:25:00 -0700 Subject: [PATCH 20/46] Update makefile-common --- test/npu-xrt/makefile-common | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test/npu-xrt/makefile-common b/test/npu-xrt/makefile-common index 51e9a19245..bdde6760b6 100644 --- a/test/npu-xrt/makefile-common +++ b/test/npu-xrt/makefile-common @@ -1,17 +1,19 @@ -# Contains common definitions used across the Makefiles of npu-xrt tests. - # VITIS related variables -VITIS_ROOT ?= $(shell realpath $(dir $(shell which vitis))/../) -VITIS_AIETOOLS_DIR ?= ${VITIS_ROOT}/aietools -VITIS_AIE_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/versal_prod/lib -VITIS_AIE2_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/aie_ml/lib +AIETOOLS_DIR ?= $(shell realpath $(dir $(shell which xchesscc))/../) +AIE_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/versal_prod/lib +AIE2_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/aie_ml/lib + +AIEOPT_DIR ?= $(shell realpath $(dir $(shell which aie-opt))/..) + +WARNING_FLAGS = -Wno-parentheses -Wno-attributes -Wno-macro-redefined -CHESSCC1_FLAGS = -f -p me -P ${VITIS_AIE_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -CHESSCC2_FLAGS = -f -p me -P ${VITIS_AIE2_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 -CHESS_FLAGS = -P ${VITIS_AIE_INCLUDE_DIR} +CHESSCC1_FLAGS = -f -p me -P ${AIE_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include +CHESSCC2_FLAGS = -f -p me -P ${AIE2_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 +CHESS_FLAGS = -P ${AIE_INCLUDE_DIR} -CHESSCCWRAP1_FLAGS = aie -I ${VITIS_AIETOOLS_DIR}/include -CHESSCCWRAP2_FLAGS = aie2 -I ${VITIS_AIETOOLS_DIR}/include +CHESSCCWRAP1_FLAGS = aie -I ${AIETOOLS_DIR}/include +CHESSCCWRAP2_FLAGS = aie2 -I ${AIETOOLS_DIR}/include +PEANOWRAP2_FLAGS = -O2 -v -std=c++20 --target=aie2-none-unknown-elf ${WARNING_FLAGS} -DNDEBUG -I ${AIEOPT_DIR}/include TEST_POWERSHELL := $(shell command -v powershell.exe >/dev/null 2>&1 && echo yes || echo no) ifeq ($(TEST_POWERSHELL),yes) From dced95e6e71ce1b3b1984ae97194372ecf50338a Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Wed, 13 Nov 2024 13:19:27 -0700 Subject: [PATCH 21/46] Revert "Update makefile-common" This reverts commit 3b4f1afaf137e0f00d2bec02a60b4d3d5c84d18c. --- test/npu-xrt/makefile-common | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/test/npu-xrt/makefile-common b/test/npu-xrt/makefile-common index bdde6760b6..51e9a19245 100644 --- a/test/npu-xrt/makefile-common +++ b/test/npu-xrt/makefile-common @@ -1,19 +1,17 @@ -# VITIS related variables -AIETOOLS_DIR ?= $(shell realpath $(dir $(shell which xchesscc))/../) -AIE_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/versal_prod/lib -AIE2_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/aie_ml/lib - -AIEOPT_DIR ?= $(shell realpath $(dir $(shell which aie-opt))/..) +# Contains common definitions used across the Makefiles of npu-xrt tests. -WARNING_FLAGS = -Wno-parentheses -Wno-attributes -Wno-macro-redefined +# VITIS related variables +VITIS_ROOT ?= $(shell realpath $(dir $(shell which vitis))/../) +VITIS_AIETOOLS_DIR ?= ${VITIS_ROOT}/aietools +VITIS_AIE_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/versal_prod/lib +VITIS_AIE2_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/aie_ml/lib -CHESSCC1_FLAGS = -f -p me -P ${AIE_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -CHESSCC2_FLAGS = -f -p me -P ${AIE2_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 -CHESS_FLAGS = -P ${AIE_INCLUDE_DIR} +CHESSCC1_FLAGS = -f -p me -P ${VITIS_AIE_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include +CHESSCC2_FLAGS = -f -p me -P ${VITIS_AIE2_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 +CHESS_FLAGS = -P ${VITIS_AIE_INCLUDE_DIR} -CHESSCCWRAP1_FLAGS = aie -I ${AIETOOLS_DIR}/include -CHESSCCWRAP2_FLAGS = aie2 -I ${AIETOOLS_DIR}/include -PEANOWRAP2_FLAGS = -O2 -v -std=c++20 --target=aie2-none-unknown-elf ${WARNING_FLAGS} -DNDEBUG -I ${AIEOPT_DIR}/include +CHESSCCWRAP1_FLAGS = aie -I ${VITIS_AIETOOLS_DIR}/include +CHESSCCWRAP2_FLAGS = aie2 -I ${VITIS_AIETOOLS_DIR}/include TEST_POWERSHELL := $(shell command -v powershell.exe >/dev/null 2>&1 && echo yes || echo no) ifeq ($(TEST_POWERSHELL),yes) From 98f8b0d2cd78666e7f4aef2a6712a6b2347ec708 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 13 Nov 2024 19:32:06 -0700 Subject: [PATCH 22/46] Testing sliding window test cases as examples --- .../dyn_objFifo/sliding_window/Makefile | 66 +++++++++ .../dyn_objFifo/sliding_window/aie2.py | 76 ++++++++++ .../dyn_objFifo/sliding_window/kernel.cc | 24 +++ .../dyn_objFifo/sliding_window/test.cpp | 138 ++++++++++++++++++ .../sliding_window_conditional/Makefile | 66 +++++++++ .../sliding_window_conditional/aie2.py | 74 ++++++++++ .../sliding_window_conditional/kernel.cc | 24 +++ .../sliding_window_conditional/test.cpp | 138 ++++++++++++++++++ .../two_core_sliding_window/Makefile | 66 +++++++++ .../two_core_sliding_window/aie2.py | 90 ++++++++++++ .../two_core_sliding_window/kernel.cc | 38 +++++ .../two_core_sliding_window/test.cpp | 138 ++++++++++++++++++ 12 files changed, 938 insertions(+) create mode 100644 programming_examples/dyn_objFifo/sliding_window/Makefile create mode 100644 programming_examples/dyn_objFifo/sliding_window/aie2.py create mode 100644 programming_examples/dyn_objFifo/sliding_window/kernel.cc create mode 100644 programming_examples/dyn_objFifo/sliding_window/test.cpp create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/Makefile create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/Makefile create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp diff --git a/programming_examples/dyn_objFifo/sliding_window/Makefile b/programming_examples/dyn_objFifo/sliding_window/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window/aie2.py b/programming_examples/dyn_objFifo/sliding_window/aie2.py new file mode 100644 index 0000000000..08d92c73e1 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/aie2.py @@ -0,0 +1,76 @@ +# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) + elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + of_out.release(ObjectFifoPort.Produce, 1) + + for _ in range_(8): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) + + elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + of_in.release(ObjectFifoPort.Consume, 2) + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window/kernel.cc b/programming_examples/dyn_objFifo/sliding_window/kernel.cc new file mode 100644 index 0000000000..ddb474e102 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/kernel.cc @@ -0,0 +1,24 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/sliding_window/test.cpp b/programming_examples/dyn_objFifo/sliding_window/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile b/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py new file mode 100644 index 0000000000..8ab2dfa636 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py @@ -0,0 +1,74 @@ +# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + for i in range_(10): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + if i == 0: + elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOut]) + elif i == 9: + elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 2) + else: + elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc b/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc new file mode 100644 index 0000000000..ddb474e102 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc @@ -0,0 +1,24 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp b/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile b/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile new file mode 100644 index 0000000000..4e423e1df1 --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py new file mode 100644 index 0000000000..e815fada7c --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py @@ -0,0 +1,90 @@ +# dynamic_object_fifo/two_core_sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def two_core_sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + ComputeTile2 = tile(col, 4) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) + of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + passthrough_10_i32 = external_func( + "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + ) + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + for _ in range_(10): + elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + call(passthrough_10_i32, [elemIn, elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + of_in2.release(ObjectFifoPort.Produce, 1) + + @core(ComputeTile2, "kernel.o") + def core_body(): + elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) + elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + of_out.release(ObjectFifoPort.Produce, 1) + + for _ in range_(8): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in2.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) + + elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + of_in2.release(ObjectFifoPort.Consume, 2) + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +two_core_sliding_window() diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc b/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc new file mode 100644 index 0000000000..7e4515193c --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc @@ -0,0 +1,38 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void passthrough(const T_in *__restrict in, T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = in[i]; + } +} + +extern "C" { + +void passthrough_10_i32(const int *__restrict in, int *__restrict out) { + passthrough(in, out); +} +} + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp b/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} From 130211597211f7db93c7c9475bd1e0f4867793fb Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 13 Nov 2024 19:54:55 -0700 Subject: [PATCH 23/46] Updated aie2.py with the latest python bindings --- .../dyn_objFifo/sliding_window/aie2.py | 4 +-- .../sliding_window_conditional/aie2.py | 27 ++++++++-------- .../two_core_sliding_window/aie2.py | 32 +++++++++---------- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/programming_examples/dyn_objFifo/sliding_window/aie2.py b/programming_examples/dyn_objFifo/sliding_window/aie2.py index 08d92c73e1..57d5efb1a5 100644 --- a/programming_examples/dyn_objFifo/sliding_window/aie2.py +++ b/programming_examples/dyn_objFifo/sliding_window/aie2.py @@ -6,11 +6,9 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys - from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py index 8ab2dfa636..83719bc8e8 100644 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py @@ -6,11 +6,11 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 @@ -24,49 +24,48 @@ def sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for i in range_(10): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) if i == 0: elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOut]) + add_10_i32(elemInPre, elemInPre, elemOut) elif i == 9: elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOut) of_in.release(ObjectFifoPort.Consume, 2) else: elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module) diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py index e815fada7c..c0d7c805ee 100644 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py @@ -6,11 +6,11 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 @@ -24,7 +24,7 @@ def two_core_sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) @@ -32,16 +32,16 @@ def device_body(): ComputeTile2 = tile(col, 4) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) - of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, subtensor_ty) + of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, subtensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + "passthrough_10_i32", inputs=[subtensor_ty, subtensor_ty] ) add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles @@ -51,7 +51,7 @@ def core_body(): for _ in range_(10): elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - call(passthrough_10_i32, [elemIn, elemOut]) + passthrough_10_i32(elemIn, elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Produce, 1) @@ -59,30 +59,30 @@ def core_body(): def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + add_10_i32(elemInPre, elemInPre, elemOutPre) of_out.release(ObjectFifoPort.Produce, 1) for _ in range_(8): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in2.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost) of_in2.release(ObjectFifoPort.Consume, 2) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module) From 57237c981cce19345e615cc83f424a13e479c123 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 21 Nov 2024 10:29:00 -0700 Subject: [PATCH 24/46] Lit file --- programming_examples/dyn_objFifo/lit.local.cfg | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 programming_examples/dyn_objFifo/lit.local.cfg diff --git a/programming_examples/dyn_objFifo/lit.local.cfg b/programming_examples/dyn_objFifo/lit.local.cfg new file mode 100644 index 0000000000..64cca87fdf --- /dev/null +++ b/programming_examples/dyn_objFifo/lit.local.cfg @@ -0,0 +1,11 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023 AMD Inc. + +config.suffixes = ['.lit'] + +if 'AIE2' not in config.vitis_components: + config.unsupported = True From 5072fe5de4080525727ed229efad59d953393679 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 09:43:56 -0700 Subject: [PATCH 25/46] Finding the problem #Attempt 1 --- .../dyn_objFifo/nested_loops/Makefile | 10 +++--- .../dyn_objFifo/nested_loops/aie.mlir | 36 +++++++++++++++++++ .../dyn_objFifo/nested_loops/run_makefile.lit | 9 +++++ 3 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 programming_examples/dyn_objFifo/nested_loops/aie.mlir create mode 100644 programming_examples/dyn_objFifo/nested_loops/run_makefile.lit diff --git a/programming_examples/dyn_objFifo/nested_loops/Makefile b/programming_examples/dyn_objFifo/nested_loops/Makefile index 0216ac75da..7a3b9545a5 100644 --- a/programming_examples/dyn_objFifo/nested_loops/Makefile +++ b/programming_examples/dyn_objFifo/nested_loops/Makefile @@ -35,18 +35,18 @@ host_target?=build/test .PHONY: all all: ${xclbin_target} ${host_target} -build/aie.mlir: ${srcdir}/aie2.py - mkdir -p ${@D} - python3 $< > $@ +# build/aie.mlir: ${srcdir}/aie2.py +# mkdir -p ${@D} +# python3 $< > $@ build/kernel.o: ${srcdir}/kernel.cc mkdir -p ${@D} cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} -${xclbin_target}: build/aie.mlir build/kernel.o +${xclbin_target}: ${srcdir}/aie.mlir build/kernel.o mkdir -p ${@D} cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${srcdir}/${<:%=../%} ${host_target}: ${srcdir}/test.cpp ${xclbin_target} mkdir -p ${@D} diff --git a/programming_examples/dyn_objFifo/nested_loops/aie.mlir b/programming_examples/dyn_objFifo/nested_loops/aie.mlir new file mode 100644 index 0000000000..1fb0cda89c --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/aie.mlir @@ -0,0 +1,36 @@ +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + aie.objectfifo @in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + func.func private @passthrough_10_i32(memref<10xi32>, memref<10xi32>) + %core_0_2 = aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5 : index + %c1 = arith.constant 1 : index + scf.for %arg0 = %c0 to %c5 step %c1 { + %0 = aie.objectfifo.acquire @in(Consume, 1) : !aie.objectfifosubview> + %1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview> -> memref<10xi32> + %c0_0 = arith.constant 0 : index + %c5_1 = arith.constant 5 : index + %c1_2 = arith.constant 1 : index + scf.for %arg1 = %c0_0 to %c5_1 step %c1_2 { + %2 = aie.objectfifo.acquire @out(Produce, 1) : !aie.objectfifosubview> + %3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview> -> memref<10xi32> + func.call @passthrough_10_i32(%1, %3) : (memref<10xi32>, memref<10xi32>) -> () + aie.objectfifo.release @out(Produce, 1) + } + aie.objectfifo.release @in(Consume, 1) + } + aie.end + } {link_with = "kernel.o"} + aiex.runtime_sequence(%arg0: memref<10xi32>, %arg1: memref<10xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 50][0, 0, 0, 1]) {id = 1 : i64, issue_token = true, metadata = @in} : memref<10xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 250][0, 0, 0, 1]) {id = 0 : i64, metadata = @out} : memref<10xi32> + aiex.npu.dma_wait {symbol = @in} + aiex.npu.dma_wait {symbol = @out} + } + } +} + diff --git a/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit b/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit new file mode 100644 index 0000000000..507b70720a --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, xchess +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile +// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s +// CHECK: PASS! \ No newline at end of file From 624a961c1a5c3310f52c151010bc9b287541cdf2 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 12:48:13 -0700 Subject: [PATCH 26/46] Second attempt --- .../dyn_objFifo/sliding_window/run_makefile.lit | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 programming_examples/dyn_objFifo/sliding_window/run_makefile.lit diff --git a/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit b/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit new file mode 100644 index 0000000000..507b70720a --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, xchess +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile +// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s +// CHECK: PASS! \ No newline at end of file From f01462390a331a87e4dbecb51c276260472c9628 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 13:48:15 -0700 Subject: [PATCH 27/46] Python to mlir --- .../dynamic_object_fifo/nested_loops/aie2.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..c13fba401e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -7,12 +7,28 @@ # REQUIRES: ryzen_ai, valid_xchess_license # -# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o -# RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s -# CHECK: PASS! +# RUN: %python %s | FileCheck %s +#CHECK: %core_0_2 = aie.core(%tile_0_2) { +#CHECK: %c0 = arith.constant 0 : index +#CHECK: %c5 = arith.constant 5 : index +#CHECK: %c1 = arith.constant 1 : index +#CHECK: scf.for %arg0 = %c0 to %c5 step %c1 { +#CHECK: %0 = aie.objectfifo.acquire @in(Consume, 1) : !aie.objectfifosubview> +#CHECK: %1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview> -> memref<10xi32> +#CHECK: %c0_0 = arith.constant 0 : index +#CHECK: %c5_1 = arith.constant 5 : index +#CHECK: %c1_2 = arith.constant 1 : index +#CHECK: scf.for %arg1 = %c0_0 to %c5_1 step %c1_2 { +#CHECK: %2 = aie.objectfifo.acquire @out(Produce, 1) : !aie.objectfifosubview> +#CHECK: %3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview> -> memref<10xi32> +#CHECK: func.call @passthrough_10_i32(%1, %3) : (memref<10xi32>, memref<10xi32>) -> () +#CHECK: aie.objectfifo.release @out(Produce, 1) +#CHECK: } +#CHECK: aie.objectfifo.release @in(Consume, 1) +#CHECK: } +#CHECK: aie.end +#CHECK: } {link_with = "kernel.o"} + import numpy as np from aie.dialects.aie import * From 9f090a97631a4c4b4c3f36d597caa9bebb7410a3 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 13:58:36 -0700 Subject: [PATCH 28/46] aie-opt with dynamic object fifo flag lowering --- .../dynamic_object_fifo/nested_loops/aie2.py | 98 +++++++++++++++---- 1 file changed, 77 insertions(+), 21 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index c13fba401e..612a4d85c3 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -7,27 +7,83 @@ # REQUIRES: ryzen_ai, valid_xchess_license # -# RUN: %python %s | FileCheck %s -#CHECK: %core_0_2 = aie.core(%tile_0_2) { -#CHECK: %c0 = arith.constant 0 : index -#CHECK: %c5 = arith.constant 5 : index -#CHECK: %c1 = arith.constant 1 : index -#CHECK: scf.for %arg0 = %c0 to %c5 step %c1 { -#CHECK: %0 = aie.objectfifo.acquire @in(Consume, 1) : !aie.objectfifosubview> -#CHECK: %1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview> -> memref<10xi32> -#CHECK: %c0_0 = arith.constant 0 : index -#CHECK: %c5_1 = arith.constant 5 : index -#CHECK: %c1_2 = arith.constant 1 : index -#CHECK: scf.for %arg1 = %c0_0 to %c5_1 step %c1_2 { -#CHECK: %2 = aie.objectfifo.acquire @out(Produce, 1) : !aie.objectfifosubview> -#CHECK: %3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview> -> memref<10xi32> -#CHECK: func.call @passthrough_10_i32(%1, %3) : (memref<10xi32>, memref<10xi32>) -> () -#CHECK: aie.objectfifo.release @out(Produce, 1) -#CHECK: } -#CHECK: aie.objectfifo.release @in(Consume, 1) -#CHECK: } -#CHECK: aie.end -#CHECK: } {link_with = "kernel.o"} +# RUN: %python %S/aie2.py > ./aie2.mlir +# RUN: aie-opt --aie-objectFifo-stateful-transform=dynamic-objFifos ./aie2.mlir | FileCheck %s +# CHECK: %tile_0_0 = aie.tile(0, 0) +# CHECK: %tile_0_2 = aie.tile(0, 2) +# CHECK: %out_cons_prod_lock = aie.lock(%tile_0_0, 2) {init = 1 : i32, sym_name = "out_cons_prod_lock"} +# CHECK: %out_cons_cons_lock = aie.lock(%tile_0_0, 3) {init = 0 : i32, sym_name = "out_cons_cons_lock"} +# CHECK: %out_buff_0 = aie.buffer(%tile_0_2) {sym_name = "out_buff_0"} : memref<10xi32> +# CHECK: %out_buff_1 = aie.buffer(%tile_0_2) {sym_name = "out_buff_1"} : memref<10xi32> +# CHECK: %out_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "out_prod_lock"} +# CHECK: %out_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "out_cons_lock"} +# CHECK: %in_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "in_cons_buff_0"} : memref<10xi32> +# CHECK: %in_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "in_cons_buff_1"} : memref<10xi32> +# CHECK: %in_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "in_cons_prod_lock"} +# CHECK: %in_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "in_cons_cons_lock"} +# CHECK: %in_prod_lock = aie.lock(%tile_0_0, 0) {init = 1 : i32, sym_name = "in_prod_lock"} +# CHECK: %in_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "in_cons_lock"} +# CHECK: aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) +# CHECK: aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) +# CHECK: func.func private @passthrough_10_i32(memref<10xi32>, memref<10xi32>) +# CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xindex> +# CHECK: %core_0_2 = aie.core(%tile_0_2) { +# CHECK: %c0 = arith.constant 0 : index +# CHECK: %c0_0 = arith.constant 0 : index +# CHECK: %c2 = arith.constant 2 : index +# CHECK: memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> +# CHECK: %c1 = arith.constant 1 : index +# CHECK: %c2_1 = arith.constant 2 : index +# CHECK: memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> +# CHECK: %c0_2 = arith.constant 0 : index +# CHECK: %c5 = arith.constant 5 : index +# CHECK: %c1_3 = arith.constant 1 : index +# CHECK: scf.for %arg0 = %c0_2 to %c5 step %c1_3 { +# CHECK: aie.use_lock(%in_cons_cons_lock, AcquireGreaterEqual, 1) +# CHECK: %0 = memref.load %buffer_0_2[%c1] : memref<2xindex> +# CHECK: %1 = scf.index_switch %0 -> memref<10xi32> +# CHECK: case 0 { +# CHECK: scf.yield %in_cons_buff_0 : memref<10xi32> +# CHECK: } +# CHECK: case 1 { +# CHECK: scf.yield %in_cons_buff_1 : memref<10xi32> +# CHECK: } +# CHECK: default { +# CHECK: scf.yield %in_cons_buff_0 : memref<10xi32> +# CHECK: } +# CHECK: %c0_4 = arith.constant 0 : index +# CHECK: %c5_5 = arith.constant 5 : index +# CHECK: %c1_6 = arith.constant 1 : index +# CHECK: scf.for %arg1 = %c0_4 to %c5_5 step %c1_6 { +# CHECK: aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1) +# CHECK: %5 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> +# CHECK: %6 = scf.index_switch %5 -> memref<10xi32> +# CHECK: case 0 { +# CHECK: scf.yield %out_buff_0 : memref<10xi32> +# CHECK: } +# CHECK: case 1 { +# CHECK: scf.yield %out_buff_1 : memref<10xi32> +# CHECK: } +# CHECK: default { +# CHECK: scf.yield %out_buff_0 : memref<10xi32> +# CHECK: } +# CHECK: func.call @passthrough_10_i32(%1, %6) : (memref<10xi32>, memref<10xi32>) -> () +# CHECK: aie.use_lock(%out_cons_lock, Release, 1) +# CHECK: %7 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> +# CHECK: %c1_8 = arith.constant 1 : index +# CHECK: %8 = arith.addi %7, %c1_8 : index +# CHECK: %9 = arith.remsi %8, %c2 : index +# CHECK: memref.store %9, %buffer_0_2[%c0_0] : memref<2xindex> +# CHECK: } +# CHECK: aie.use_lock(%in_cons_prod_lock, Release, 1) +# CHECK: %2 = memref.load %buffer_0_2[%c1] : memref<2xindex> +# CHECK: %c1_7 = arith.constant 1 : index +# CHECK: %3 = arith.addi %2, %c1_7 : index +# CHECK: %4 = arith.remsi %3, %c2_1 : index +# CHECK: memref.store %4, %buffer_0_2[%c1] : memref<2xindex> +# CHECK: } +# CHECK: aie.end +# CHECK: } {link_with = "kernel.o"} import numpy as np From bb43c9592e967b3e7b834c037985a38c9973fb1c Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 15:08:31 -0700 Subject: [PATCH 29/46] Removing core to see the problem --- .../dynamic_object_fifo/nested_loops/aie2.py | 91 +------------------ 1 file changed, 5 insertions(+), 86 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 612a4d85c3..68ca1bce14 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -7,83 +7,12 @@ # REQUIRES: ryzen_ai, valid_xchess_license # +# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: aie-opt --aie-objectFifo-stateful-transform=dynamic-objFifos ./aie2.mlir | FileCheck %s -# CHECK: %tile_0_0 = aie.tile(0, 0) -# CHECK: %tile_0_2 = aie.tile(0, 2) -# CHECK: %out_cons_prod_lock = aie.lock(%tile_0_0, 2) {init = 1 : i32, sym_name = "out_cons_prod_lock"} -# CHECK: %out_cons_cons_lock = aie.lock(%tile_0_0, 3) {init = 0 : i32, sym_name = "out_cons_cons_lock"} -# CHECK: %out_buff_0 = aie.buffer(%tile_0_2) {sym_name = "out_buff_0"} : memref<10xi32> -# CHECK: %out_buff_1 = aie.buffer(%tile_0_2) {sym_name = "out_buff_1"} : memref<10xi32> -# CHECK: %out_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "out_prod_lock"} -# CHECK: %out_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "out_cons_lock"} -# CHECK: %in_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "in_cons_buff_0"} : memref<10xi32> -# CHECK: %in_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "in_cons_buff_1"} : memref<10xi32> -# CHECK: %in_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "in_cons_prod_lock"} -# CHECK: %in_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "in_cons_cons_lock"} -# CHECK: %in_prod_lock = aie.lock(%tile_0_0, 0) {init = 1 : i32, sym_name = "in_prod_lock"} -# CHECK: %in_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "in_cons_lock"} -# CHECK: aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) -# CHECK: aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) -# CHECK: func.func private @passthrough_10_i32(memref<10xi32>, memref<10xi32>) -# CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xindex> -# CHECK: %core_0_2 = aie.core(%tile_0_2) { -# CHECK: %c0 = arith.constant 0 : index -# CHECK: %c0_0 = arith.constant 0 : index -# CHECK: %c2 = arith.constant 2 : index -# CHECK: memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> -# CHECK: %c1 = arith.constant 1 : index -# CHECK: %c2_1 = arith.constant 2 : index -# CHECK: memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> -# CHECK: %c0_2 = arith.constant 0 : index -# CHECK: %c5 = arith.constant 5 : index -# CHECK: %c1_3 = arith.constant 1 : index -# CHECK: scf.for %arg0 = %c0_2 to %c5 step %c1_3 { -# CHECK: aie.use_lock(%in_cons_cons_lock, AcquireGreaterEqual, 1) -# CHECK: %0 = memref.load %buffer_0_2[%c1] : memref<2xindex> -# CHECK: %1 = scf.index_switch %0 -> memref<10xi32> -# CHECK: case 0 { -# CHECK: scf.yield %in_cons_buff_0 : memref<10xi32> -# CHECK: } -# CHECK: case 1 { -# CHECK: scf.yield %in_cons_buff_1 : memref<10xi32> -# CHECK: } -# CHECK: default { -# CHECK: scf.yield %in_cons_buff_0 : memref<10xi32> -# CHECK: } -# CHECK: %c0_4 = arith.constant 0 : index -# CHECK: %c5_5 = arith.constant 5 : index -# CHECK: %c1_6 = arith.constant 1 : index -# CHECK: scf.for %arg1 = %c0_4 to %c5_5 step %c1_6 { -# CHECK: aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1) -# CHECK: %5 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -# CHECK: %6 = scf.index_switch %5 -> memref<10xi32> -# CHECK: case 0 { -# CHECK: scf.yield %out_buff_0 : memref<10xi32> -# CHECK: } -# CHECK: case 1 { -# CHECK: scf.yield %out_buff_1 : memref<10xi32> -# CHECK: } -# CHECK: default { -# CHECK: scf.yield %out_buff_0 : memref<10xi32> -# CHECK: } -# CHECK: func.call @passthrough_10_i32(%1, %6) : (memref<10xi32>, memref<10xi32>) -> () -# CHECK: aie.use_lock(%out_cons_lock, Release, 1) -# CHECK: %7 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -# CHECK: %c1_8 = arith.constant 1 : index -# CHECK: %8 = arith.addi %7, %c1_8 : index -# CHECK: %9 = arith.remsi %8, %c2 : index -# CHECK: memref.store %9, %buffer_0_2[%c0_0] : memref<2xindex> -# CHECK: } -# CHECK: aie.use_lock(%in_cons_prod_lock, Release, 1) -# CHECK: %2 = memref.load %buffer_0_2[%c1] : memref<2xindex> -# CHECK: %c1_7 = arith.constant 1 : index -# CHECK: %3 = arith.addi %2, %c1_7 : index -# CHECK: %4 = arith.remsi %3, %c2_1 : index -# CHECK: memref.store %4, %buffer_0_2[%c1] : memref<2xindex> -# CHECK: } -# CHECK: aie.end -# CHECK: } {link_with = "kernel.o"} +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# CHECK: PASS! import numpy as np @@ -119,16 +48,6 @@ def device_body(): "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] ) - # Set up compute tiles - @core(ComputeTile, "kernel.o") - def core_body(): - for _ in range_(5): - elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - for _ in range_(5): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - passthrough_10_i32(elemIn, elemOut) - of_out.release(ObjectFifoPort.Produce, 1) - of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement @runtime_sequence(tensor_ty, tensor_ty) From 7797c5da91c38b813d9e725daeccca3a0a048bf3 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 15:25:20 -0700 Subject: [PATCH 30/46] Remove inner loop --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 68ca1bce14..6ad086d3a4 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -48,6 +48,16 @@ def device_body(): "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] ) + # Set up compute tiles + @core(ComputeTile, "kernel.o") + def core_body(): + for _ in range_(5): + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + # for _ in range_(5): + # elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + # passthrough_10_i32(elemIn, elemOut) + # of_out.release(ObjectFifoPort.Produce, 1) + of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement @runtime_sequence(tensor_ty, tensor_ty) From a8eec7bb6b501611bc43b8ccb875d2c01a08f5f6 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 15:42:04 -0700 Subject: [PATCH 31/46] Remove body of outer loop --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 6ad086d3a4..0638d9b59e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -52,12 +52,12 @@ def device_body(): @core(ComputeTile, "kernel.o") def core_body(): for _ in range_(5): - elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + # elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) # for _ in range_(5): # elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) # passthrough_10_i32(elemIn, elemOut) # of_out.release(ObjectFifoPort.Produce, 1) - of_in.release(ObjectFifoPort.Consume, 1) + # of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement @runtime_sequence(tensor_ty, tensor_ty) From ada9589d16f1989b13d6cd1f157711a35807527d Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 3 Dec 2024 16:00:12 -0700 Subject: [PATCH 32/46] Problem maybe because of the usage of index_cast for switch index --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 0638d9b59e..8ae31698ab 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -40,8 +40,8 @@ def device_body(): ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 1, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 1, tensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( @@ -52,12 +52,12 @@ def device_body(): @core(ComputeTile, "kernel.o") def core_body(): for _ in range_(5): - # elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) # for _ in range_(5): # elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) # passthrough_10_i32(elemIn, elemOut) # of_out.release(ObjectFifoPort.Produce, 1) - # of_in.release(ObjectFifoPort.Consume, 1) + of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement @runtime_sequence(tensor_ty, tensor_ty) From 7e05750aa4d1ffed1e77846393d2f8f8c97af71d Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 11:08:01 -0700 Subject: [PATCH 33/46] Global buffer (?) --- .../Transforms/AIEObjectFifoStatefulTransform.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index 0d2e6c5821..437ec55fb5 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -1130,13 +1130,13 @@ struct AIEObjectFifoStatefulTransformPass // - globalNextIndex: load index and use it to index_switch (one // IndexSwithOp per AccessOp) WalkResult res = coreOp.walk([&](Operation *op) { - if (auto relOp = dyn_cast(op)) { - ObjectFifoCreateOp createOp = relOp.getObjectFifo(); - ObjectFifoPort port = relOp.getPort(); - updateGlobalNextIndex(builder, relOp, globalNextIndex, - globalIndices[{createOp, port}], - constantSizes[{createOp, port}]); - } + // if (auto relOp = dyn_cast(op)) { + // ObjectFifoCreateOp createOp = relOp.getObjectFifo(); + // ObjectFifoPort port = relOp.getPort(); + // updateGlobalNextIndex(builder, relOp, globalNextIndex, + // globalIndices[{createOp, port}], + // constantSizes[{createOp, port}]); + // } if (auto acqOp = dyn_cast(op)) { std::vector accessOps; for (auto u : acqOp->getUsers()) From dd69ad786899b0fd22b132c75d363ead8f6100b1 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 11:38:03 -0700 Subject: [PATCH 34/46] Previous one didn't remove the global buffer completely --- .../AIEObjectFifoStatefulTransform.cpp | 214 +++++++++--------- 1 file changed, 107 insertions(+), 107 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index 437ec55fb5..c96730808d 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -1089,113 +1089,113 @@ struct AIEObjectFifoStatefulTransformPass fifoSizes[{op, port}] = op.size(); }); builder.setInsertionPoint(coreOp); - auto memrefTy = - MemRefType::get(SmallVector{(int64_t)fifoSizes.size()}, - builder.getIndexType()); - auto globalNextIndex = builder.create( - builder.getUnknownLoc(), memrefTy, coreOp.getTile(), - /*sym_name*/ nullptr, /*address*/ nullptr, - /*initial_value*/ nullptr, /*mem_bank*/ nullptr); - - // Initialize all counters in the global buffers to 0. - // Also, keep a map of the ConstantOps for the indices per OF - // and a map with the ConstantOps for the sizes per OF. - std::map, - arith::ConstantOp> - globalIndices; - std::map, - arith::ConstantOp> - constantSizes; - int index = 0; - builder.setInsertionPointToStart(&(coreOp.getBody().front())); - Value initVal = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(0)); - for (auto i : fifoSizes) { - auto indexOp = builder.create( - initVal.getLoc(), builder.getIndexAttr(index)); - globalIndices[i.first] = indexOp; - index++; - auto size = builder.create( - indexOp.getLoc(), builder.getIndexAttr(i.second)); - constantSizes[i.first] = size; - builder.create( - size.getLoc(), initVal, globalNextIndex, - ValueRange(ArrayRef({indexOp.getResult()}))); - } - - // Walk the code: - // - after each ObjectFifoReleaseOp: - // - globalNextIndex: add #rel modulo objfifo depth - // - before each ObjectFifoAcquireOp: - // - globalNextIndex: load index and use it to index_switch (one - // IndexSwithOp per AccessOp) - WalkResult res = coreOp.walk([&](Operation *op) { - // if (auto relOp = dyn_cast(op)) { - // ObjectFifoCreateOp createOp = relOp.getObjectFifo(); - // ObjectFifoPort port = relOp.getPort(); - // updateGlobalNextIndex(builder, relOp, globalNextIndex, - // globalIndices[{createOp, port}], - // constantSizes[{createOp, port}]); - // } - if (auto acqOp = dyn_cast(op)) { - std::vector accessOps; - for (auto u : acqOp->getUsers()) - if (auto accessOp = dyn_cast(u)) - accessOps.push_back(accessOp); - - for (auto accessOp : accessOps) { - ObjectFifoCreateOp createOp = acqOp.getObjectFifo(); - ObjectFifoPort port = acqOp.getPort(); - - // Single switch case - if (fifoSizes[{createOp, port}] == 1) - return WalkResult::advance(); - - // Create a switch for each subview access - builder.setInsertionPointAfter(accessOp); - auto switchIndex = builder.create( - builder.getUnknownLoc(), globalNextIndex, - ValueRange( - ArrayRef({globalIndices[{createOp, port}].getResult()}))); - unsigned caseRegionCounts = fifoSizes[{createOp, port}]; - SmallVector caseValues; - for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { - caseValues.push_back(i); - } - auto cases = - DenseI64ArrayAttr::get(builder.getContext(), caseValues); - auto switchOp = builder.create( - switchIndex.getLoc(), - TypeRange({buffersPerFifo[createOp][0].getType()}), - switchIndex, cases, caseRegionCounts); - // Create default case of IndexSwitchOp - builder.createBlock(&switchOp.getDefaultRegion()); - auto bufferIndex = (accessOp.getIndex()) % createOp.size(); - builder.setInsertionPointToStart(&(switchOp.getDefaultBlock())); - builder.create( - builder.getUnknownLoc(), - buffersPerFifo[createOp][bufferIndex].getResult()); - for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { - // Create other cases of IndexSwitchOp - builder.createBlock(&switchOp.getCaseRegions()[i]); - builder.setInsertionPoint(&switchOp.getCaseBlock(i), - switchOp.getCaseBlock(i).begin()); - int bufferToBeAccesed = - (accessOp.getIndex() + i) % fifoSizes[{createOp, port}]; - builder.create( - switchOp.getCaseRegions()[i].getLoc(), - buffersPerFifo[createOp][bufferToBeAccesed].getResult()); - } - - // Replace all uses of accessed objectfifo buffers with - // results of switchOps - accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0)); - } - } - return WalkResult::advance(); - }); - if (res.wasInterrupted()) - return failure(); + // auto memrefTy = + // MemRefType::get(SmallVector{(int64_t)fifoSizes.size()}, + // builder.getIndexType()); + // auto globalNextIndex = builder.create( + // builder.getUnknownLoc(), memrefTy, coreOp.getTile(), + // /*sym_name*/ nullptr, /*address*/ nullptr, + // /*initial_value*/ nullptr, /*mem_bank*/ nullptr); + + // // Initialize all counters in the global buffers to 0. + // // Also, keep a map of the ConstantOps for the indices per OF + // // and a map with the ConstantOps for the sizes per OF. + // std::map, + // arith::ConstantOp> + // globalIndices; + // std::map, + // arith::ConstantOp> + // constantSizes; + // int index = 0; + // builder.setInsertionPointToStart(&(coreOp.getBody().front())); + // Value initVal = builder.create( + // builder.getUnknownLoc(), builder.getIndexAttr(0)); + // for (auto i : fifoSizes) { + // auto indexOp = builder.create( + // initVal.getLoc(), builder.getIndexAttr(index)); + // globalIndices[i.first] = indexOp; + // index++; + // auto size = builder.create( + // indexOp.getLoc(), builder.getIndexAttr(i.second)); + // constantSizes[i.first] = size; + // builder.create( + // size.getLoc(), initVal, globalNextIndex, + // ValueRange(ArrayRef({indexOp.getResult()}))); + // } + + // // Walk the code: + // // - after each ObjectFifoReleaseOp: + // // - globalNextIndex: add #rel modulo objfifo depth + // // - before each ObjectFifoAcquireOp: + // // - globalNextIndex: load index and use it to index_switch (one + // // IndexSwithOp per AccessOp) + // WalkResult res = coreOp.walk([&](Operation *op) { + // if (auto relOp = dyn_cast(op)) { + // ObjectFifoCreateOp createOp = relOp.getObjectFifo(); + // ObjectFifoPort port = relOp.getPort(); + // updateGlobalNextIndex(builder, relOp, globalNextIndex, + // globalIndices[{createOp, port}], + // constantSizes[{createOp, port}]); + // } + // if (auto acqOp = dyn_cast(op)) { + // std::vector accessOps; + // for (auto u : acqOp->getUsers()) + // if (auto accessOp = dyn_cast(u)) + // accessOps.push_back(accessOp); + + // for (auto accessOp : accessOps) { + // ObjectFifoCreateOp createOp = acqOp.getObjectFifo(); + // ObjectFifoPort port = acqOp.getPort(); + + // // Single switch case + // if (fifoSizes[{createOp, port}] == 1) + // return WalkResult::advance(); + + // // Create a switch for each subview access + // builder.setInsertionPointAfter(accessOp); + // auto switchIndex = builder.create( + // builder.getUnknownLoc(), globalNextIndex, + // ValueRange( + // ArrayRef({globalIndices[{createOp, port}].getResult()}))); + // unsigned caseRegionCounts = fifoSizes[{createOp, port}]; + // SmallVector caseValues; + // for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { + // caseValues.push_back(i); + // } + // auto cases = + // DenseI64ArrayAttr::get(builder.getContext(), caseValues); + // auto switchOp = builder.create( + // switchIndex.getLoc(), + // TypeRange({buffersPerFifo[createOp][0].getType()}), + // switchIndex, cases, caseRegionCounts); + // // Create default case of IndexSwitchOp + // builder.createBlock(&switchOp.getDefaultRegion()); + // auto bufferIndex = (accessOp.getIndex()) % createOp.size(); + // builder.setInsertionPointToStart(&(switchOp.getDefaultBlock())); + // builder.create( + // builder.getUnknownLoc(), + // buffersPerFifo[createOp][bufferIndex].getResult()); + // for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { + // // Create other cases of IndexSwitchOp + // builder.createBlock(&switchOp.getCaseRegions()[i]); + // builder.setInsertionPoint(&switchOp.getCaseBlock(i), + // switchOp.getCaseBlock(i).begin()); + // int bufferToBeAccesed = + // (accessOp.getIndex() + i) % fifoSizes[{createOp, port}]; + // builder.create( + // switchOp.getCaseRegions()[i].getLoc(), + // buffersPerFifo[createOp][bufferToBeAccesed].getResult()); + // } + + // // Replace all uses of accessed objectfifo buffers with + // // results of switchOps + // accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0)); + // } + // } + // return WalkResult::advance(); + // }); + // if (res.wasInterrupted()) + // return failure(); } } return success(); From 6d3f7502c6d4a25848a364f07302e56727d93dbc Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 13:06:33 -0700 Subject: [PATCH 35/46] Confirming that global buffer is the problem --- .../AIEObjectFifoStatefulTransform.cpp | 214 +++++++++--------- 1 file changed, 107 insertions(+), 107 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index c96730808d..5af8b87fde 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -1089,113 +1089,113 @@ struct AIEObjectFifoStatefulTransformPass fifoSizes[{op, port}] = op.size(); }); builder.setInsertionPoint(coreOp); - // auto memrefTy = - // MemRefType::get(SmallVector{(int64_t)fifoSizes.size()}, - // builder.getIndexType()); - // auto globalNextIndex = builder.create( - // builder.getUnknownLoc(), memrefTy, coreOp.getTile(), - // /*sym_name*/ nullptr, /*address*/ nullptr, - // /*initial_value*/ nullptr, /*mem_bank*/ nullptr); - - // // Initialize all counters in the global buffers to 0. - // // Also, keep a map of the ConstantOps for the indices per OF - // // and a map with the ConstantOps for the sizes per OF. - // std::map, - // arith::ConstantOp> - // globalIndices; - // std::map, - // arith::ConstantOp> - // constantSizes; - // int index = 0; - // builder.setInsertionPointToStart(&(coreOp.getBody().front())); - // Value initVal = builder.create( - // builder.getUnknownLoc(), builder.getIndexAttr(0)); - // for (auto i : fifoSizes) { - // auto indexOp = builder.create( - // initVal.getLoc(), builder.getIndexAttr(index)); - // globalIndices[i.first] = indexOp; - // index++; - // auto size = builder.create( - // indexOp.getLoc(), builder.getIndexAttr(i.second)); - // constantSizes[i.first] = size; - // builder.create( - // size.getLoc(), initVal, globalNextIndex, - // ValueRange(ArrayRef({indexOp.getResult()}))); - // } - - // // Walk the code: - // // - after each ObjectFifoReleaseOp: - // // - globalNextIndex: add #rel modulo objfifo depth - // // - before each ObjectFifoAcquireOp: - // // - globalNextIndex: load index and use it to index_switch (one - // // IndexSwithOp per AccessOp) - // WalkResult res = coreOp.walk([&](Operation *op) { - // if (auto relOp = dyn_cast(op)) { - // ObjectFifoCreateOp createOp = relOp.getObjectFifo(); - // ObjectFifoPort port = relOp.getPort(); - // updateGlobalNextIndex(builder, relOp, globalNextIndex, - // globalIndices[{createOp, port}], - // constantSizes[{createOp, port}]); - // } - // if (auto acqOp = dyn_cast(op)) { - // std::vector accessOps; - // for (auto u : acqOp->getUsers()) - // if (auto accessOp = dyn_cast(u)) - // accessOps.push_back(accessOp); - - // for (auto accessOp : accessOps) { - // ObjectFifoCreateOp createOp = acqOp.getObjectFifo(); - // ObjectFifoPort port = acqOp.getPort(); - - // // Single switch case - // if (fifoSizes[{createOp, port}] == 1) - // return WalkResult::advance(); - - // // Create a switch for each subview access - // builder.setInsertionPointAfter(accessOp); - // auto switchIndex = builder.create( - // builder.getUnknownLoc(), globalNextIndex, - // ValueRange( - // ArrayRef({globalIndices[{createOp, port}].getResult()}))); - // unsigned caseRegionCounts = fifoSizes[{createOp, port}]; - // SmallVector caseValues; - // for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { - // caseValues.push_back(i); - // } - // auto cases = - // DenseI64ArrayAttr::get(builder.getContext(), caseValues); - // auto switchOp = builder.create( - // switchIndex.getLoc(), - // TypeRange({buffersPerFifo[createOp][0].getType()}), - // switchIndex, cases, caseRegionCounts); - // // Create default case of IndexSwitchOp - // builder.createBlock(&switchOp.getDefaultRegion()); - // auto bufferIndex = (accessOp.getIndex()) % createOp.size(); - // builder.setInsertionPointToStart(&(switchOp.getDefaultBlock())); - // builder.create( - // builder.getUnknownLoc(), - // buffersPerFifo[createOp][bufferIndex].getResult()); - // for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { - // // Create other cases of IndexSwitchOp - // builder.createBlock(&switchOp.getCaseRegions()[i]); - // builder.setInsertionPoint(&switchOp.getCaseBlock(i), - // switchOp.getCaseBlock(i).begin()); - // int bufferToBeAccesed = - // (accessOp.getIndex() + i) % fifoSizes[{createOp, port}]; - // builder.create( - // switchOp.getCaseRegions()[i].getLoc(), - // buffersPerFifo[createOp][bufferToBeAccesed].getResult()); - // } - - // // Replace all uses of accessed objectfifo buffers with - // // results of switchOps - // accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0)); - // } - // } - // return WalkResult::advance(); - // }); - // if (res.wasInterrupted()) - // return failure(); + auto memrefTy = + MemRefType::get(SmallVector{(int64_t)fifoSizes.size()}, + builder.getIndexType()); + auto globalNextIndex = builder.create( + builder.getUnknownLoc(), memrefTy, coreOp.getTile(), + /*sym_name*/ nullptr, /*address*/ nullptr, + /*initial_value*/ nullptr, /*mem_bank*/ nullptr); + + // Initialize all counters in the global buffers to 0. + // Also, keep a map of the ConstantOps for the indices per OF + // and a map with the ConstantOps for the sizes per OF. + std::map, + arith::ConstantOp> + globalIndices; + std::map, + arith::ConstantOp> + constantSizes; + int index = 0; + builder.setInsertionPointToStart(&(coreOp.getBody().front())); + Value initVal = builder.create( + builder.getUnknownLoc(), builder.getIndexAttr(0)); + for (auto i : fifoSizes) { + auto indexOp = builder.create( + initVal.getLoc(), builder.getIndexAttr(index)); + globalIndices[i.first] = indexOp; + index++; + auto size = builder.create( + indexOp.getLoc(), builder.getIndexAttr(i.second)); + constantSizes[i.first] = size; + // builder.create( + // size.getLoc(), initVal, globalNextIndex, + // ValueRange(ArrayRef({indexOp.getResult()}))); + } + + // Walk the code: + // - after each ObjectFifoReleaseOp: + // - globalNextIndex: add #rel modulo objfifo depth + // - before each ObjectFifoAcquireOp: + // - globalNextIndex: load index and use it to index_switch (one + // IndexSwithOp per AccessOp) + WalkResult res = coreOp.walk([&](Operation *op) { + if (auto relOp = dyn_cast(op)) { + ObjectFifoCreateOp createOp = relOp.getObjectFifo(); + ObjectFifoPort port = relOp.getPort(); + updateGlobalNextIndex(builder, relOp, globalNextIndex, + globalIndices[{createOp, port}], + constantSizes[{createOp, port}]); + } + if (auto acqOp = dyn_cast(op)) { + std::vector accessOps; + for (auto u : acqOp->getUsers()) + if (auto accessOp = dyn_cast(u)) + accessOps.push_back(accessOp); + + for (auto accessOp : accessOps) { + ObjectFifoCreateOp createOp = acqOp.getObjectFifo(); + ObjectFifoPort port = acqOp.getPort(); + + // Single switch case + if (fifoSizes[{createOp, port}] == 1) + return WalkResult::advance(); + + // Create a switch for each subview access + builder.setInsertionPointAfter(accessOp); + auto switchIndex = builder.create( + builder.getUnknownLoc(), globalNextIndex, + ValueRange( + ArrayRef({globalIndices[{createOp, port}].getResult()}))); + unsigned caseRegionCounts = fifoSizes[{createOp, port}]; + SmallVector caseValues; + for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { + caseValues.push_back(i); + } + auto cases = + DenseI64ArrayAttr::get(builder.getContext(), caseValues); + auto switchOp = builder.create( + switchIndex.getLoc(), + TypeRange({buffersPerFifo[createOp][0].getType()}), + switchIndex, cases, caseRegionCounts); + // Create default case of IndexSwitchOp + builder.createBlock(&switchOp.getDefaultRegion()); + auto bufferIndex = (accessOp.getIndex()) % createOp.size(); + builder.setInsertionPointToStart(&(switchOp.getDefaultBlock())); + builder.create( + builder.getUnknownLoc(), + buffersPerFifo[createOp][bufferIndex].getResult()); + for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { + // Create other cases of IndexSwitchOp + builder.createBlock(&switchOp.getCaseRegions()[i]); + builder.setInsertionPoint(&switchOp.getCaseBlock(i), + switchOp.getCaseBlock(i).begin()); + int bufferToBeAccesed = + (accessOp.getIndex() + i) % fifoSizes[{createOp, port}]; + builder.create( + switchOp.getCaseRegions()[i].getLoc(), + buffersPerFifo[createOp][bufferToBeAccesed].getResult()); + } + + // Replace all uses of accessed objectfifo buffers with + // results of switchOps + accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0)); + } + } + return WalkResult::advance(); + }); + if (res.wasInterrupted()) + return failure(); } } return success(); From 71cbd258186b6810302e0392f96bb9f510bc471b Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 14:10:08 -0700 Subject: [PATCH 36/46] Solutionworked locally --- .../AIEObjectFifoStatefulTransform.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index 5af8b87fde..6e9af55a07 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -1061,7 +1061,7 @@ struct AIEObjectFifoStatefulTransformPass builder.getUnknownLoc(), globalNextIndex, ValueRange(ArrayRef({index.getResult()}))); Value val = builder.create( - oldCounter.getLoc(), builder.getIndexAttr(relOp.getSize())); + oldCounter.getLoc(), builder.getI32IntegerAttr(relOp.getSize())); Value sum = builder.create(val.getLoc(), oldCounter, val); Value newCounter = builder.create(sum.getLoc(), sum, size); builder.create(size.getLoc(), newCounter, globalNextIndex, @@ -1091,7 +1091,7 @@ struct AIEObjectFifoStatefulTransformPass builder.setInsertionPoint(coreOp); auto memrefTy = MemRefType::get(SmallVector{(int64_t)fifoSizes.size()}, - builder.getIndexType()); + builder.getI32Type()); auto globalNextIndex = builder.create( builder.getUnknownLoc(), memrefTy, coreOp.getTile(), /*sym_name*/ nullptr, /*address*/ nullptr, @@ -1109,18 +1109,18 @@ struct AIEObjectFifoStatefulTransformPass int index = 0; builder.setInsertionPointToStart(&(coreOp.getBody().front())); Value initVal = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(0)); + builder.getUnknownLoc(), builder.getI32IntegerAttr(0)); for (auto i : fifoSizes) { auto indexOp = builder.create( initVal.getLoc(), builder.getIndexAttr(index)); globalIndices[i.first] = indexOp; index++; auto size = builder.create( - indexOp.getLoc(), builder.getIndexAttr(i.second)); + indexOp.getLoc(), builder.getI32IntegerAttr(i.second)); constantSizes[i.first] = size; - // builder.create( - // size.getLoc(), initVal, globalNextIndex, - // ValueRange(ArrayRef({indexOp.getResult()}))); + builder.create( + size.getLoc(), initVal, globalNextIndex, + ValueRange(ArrayRef({indexOp.getResult()}))); } // Walk the code: @@ -1153,10 +1153,13 @@ struct AIEObjectFifoStatefulTransformPass // Create a switch for each subview access builder.setInsertionPointAfter(accessOp); - auto switchIndex = builder.create( + auto switchIndexAsInteger = builder.create( builder.getUnknownLoc(), globalNextIndex, ValueRange( ArrayRef({globalIndices[{createOp, port}].getResult()}))); + auto switchIndex = builder.create( + builder.getUnknownLoc(), builder.getIndexType(), + switchIndexAsInteger); unsigned caseRegionCounts = fifoSizes[{createOp, port}]; SmallVector caseValues; for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) { From a6f7e11ba6bab6f4ffc1438e7aaaca761dc122a8 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 14:18:48 -0700 Subject: [PATCH 37/46] Removing unnecessary changes --- .../npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 12 ++++++------ .../dynamic_object_fifo/sliding_window/aie2.py | 4 ++-- .../sliding_window_conditional/aie2.py | 4 ++-- .../two_core_sliding_window/aie2.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 8ae31698ab..e9dc107466 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -40,8 +40,8 @@ def device_body(): ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 1, tensor_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 1, tensor_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( @@ -53,10 +53,10 @@ def device_body(): def core_body(): for _ in range_(5): elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - # for _ in range_(5): - # elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - # passthrough_10_i32(elemIn, elemOut) - # of_out.release(ObjectFifoPort.Produce, 1) + for _ in range_(5): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + passthrough_10_i32(elemIn, elemOut) + of_out.release(ObjectFifoPort.Produce, 1) of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 7baa366452..37222b8a78 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -5,13 +5,13 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, chess +# REQUIRES: ryzen_ai, valid_xchess_license # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! from aie.dialects.aie import * from aie.dialects.aiex import * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 3f1159da13..366552907b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -5,13 +5,13 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, chess +# REQUIRES: ryzen_ai, valid_xchess_license # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 3c53c21cd8..d0b0f53d36 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -5,13 +5,13 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, chess +# REQUIRES: ryzen_ai, valid_xchess_license # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 265dde0447a271c99dfe70e6604a445cddc218eb Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 14:34:04 -0700 Subject: [PATCH 38/46] Removed these from programming examples and modified tests: 2 sliding_window examples are still failing --- .../dyn_objFifo/lit.local.cfg | 11 -- .../dyn_objFifo/nested_loops/Makefile | 66 -------- .../dyn_objFifo/nested_loops/aie.mlir | 36 ----- .../dyn_objFifo/nested_loops/run_makefile.lit | 9 -- .../dyn_objFifo/sliding_window/Makefile | 66 -------- .../dyn_objFifo/sliding_window/aie2.py | 74 --------- .../dyn_objFifo/sliding_window/kernel.cc | 24 --- .../sliding_window/run_makefile.lit | 9 -- .../dyn_objFifo/sliding_window/test.cpp | 138 ----------------- .../sliding_window_conditional/Makefile | 66 -------- .../sliding_window_conditional/aie2.py | 73 --------- .../sliding_window_conditional/kernel.cc | 24 --- .../sliding_window_conditional/test.cpp | 138 ----------------- .../two_core_sliding_window/Makefile | 66 -------- .../two_core_sliding_window/aie2.py | 90 ----------- .../two_core_sliding_window/kernel.cc | 38 ----- .../two_core_sliding_window/test.cpp | 138 ----------------- .../dynamic_lowering_flag_test.mlir | 146 ++++++++++++++---- .../dynamic_lowering_test.mlir | 114 +++++++------- 19 files changed, 175 insertions(+), 1151 deletions(-) delete mode 100644 programming_examples/dyn_objFifo/lit.local.cfg delete mode 100644 programming_examples/dyn_objFifo/nested_loops/Makefile delete mode 100644 programming_examples/dyn_objFifo/nested_loops/aie.mlir delete mode 100644 programming_examples/dyn_objFifo/nested_loops/run_makefile.lit delete mode 100644 programming_examples/dyn_objFifo/sliding_window/Makefile delete mode 100644 programming_examples/dyn_objFifo/sliding_window/aie2.py delete mode 100644 programming_examples/dyn_objFifo/sliding_window/kernel.cc delete mode 100644 programming_examples/dyn_objFifo/sliding_window/run_makefile.lit delete mode 100644 programming_examples/dyn_objFifo/sliding_window/test.cpp delete mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/Makefile delete mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py delete mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc delete mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp delete mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/Makefile delete mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py delete mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc delete mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp diff --git a/programming_examples/dyn_objFifo/lit.local.cfg b/programming_examples/dyn_objFifo/lit.local.cfg deleted file mode 100644 index 64cca87fdf..0000000000 --- a/programming_examples/dyn_objFifo/lit.local.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2023 AMD Inc. - -config.suffixes = ['.lit'] - -if 'AIE2' not in config.vitis_components: - config.unsupported = True diff --git a/programming_examples/dyn_objFifo/nested_loops/Makefile b/programming_examples/dyn_objFifo/nested_loops/Makefile deleted file mode 100644 index 7a3b9545a5..0000000000 --- a/programming_examples/dyn_objFifo/nested_loops/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -##===- Makefile -----------------------------------------------------------===## -# -# This file licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# -##===----------------------------------------------------------------------===## - -# --- - -# The following environment variables that point to the Xilinx runtime (XRT) -# should be set up by an environment setup script already. -XILINX_XRT?=/opt/xilinx/xrt -XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) - -# --- - -srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - -XILINX_XRT_INCLUDE?=${XILINX_XRT}/include -XILINX_XRT_LIB?=${XILINX_XRT}/lib - -CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include -XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} -XRT_LIBS=-lxrt_coreutil -CXX=g++-13 -ggdb - -#mlir_target?=build/aie.mlir -xclbin_target?=build/final.xclbin -insts_target?=build/insts.txt -host_target?=build/test - -.PHONY: all -all: ${xclbin_target} ${host_target} - -# build/aie.mlir: ${srcdir}/aie2.py -# mkdir -p ${@D} -# python3 $< > $@ - -build/kernel.o: ${srcdir}/kernel.cc - mkdir -p ${@D} - cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} - -${xclbin_target}: ${srcdir}/aie.mlir build/kernel.o - mkdir -p ${@D} - cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${srcdir}/${<:%=../%} - -${host_target}: ${srcdir}/test.cpp ${xclbin_target} - mkdir -p ${@D} - ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} - -.PHONY: run -run: ${host_target} - ./${host_target} - -xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh -.PHONY: sign -sign: ${xclbin_target} - ${xclbin_sign} -dev Phoenix -xclbin $< - -.PHONY: clean -clean: - -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/nested_loops/aie.mlir b/programming_examples/dyn_objFifo/nested_loops/aie.mlir deleted file mode 100644 index 1fb0cda89c..0000000000 --- a/programming_examples/dyn_objFifo/nested_loops/aie.mlir +++ /dev/null @@ -1,36 +0,0 @@ -module { - aie.device(npu1_1col) { - %tile_0_0 = aie.tile(0, 0) - %tile_0_2 = aie.tile(0, 2) - aie.objectfifo @in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> - aie.objectfifo @out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> - func.func private @passthrough_10_i32(memref<10xi32>, memref<10xi32>) - %core_0_2 = aie.core(%tile_0_2) { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5 : index - %c1 = arith.constant 1 : index - scf.for %arg0 = %c0 to %c5 step %c1 { - %0 = aie.objectfifo.acquire @in(Consume, 1) : !aie.objectfifosubview> - %1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview> -> memref<10xi32> - %c0_0 = arith.constant 0 : index - %c5_1 = arith.constant 5 : index - %c1_2 = arith.constant 1 : index - scf.for %arg1 = %c0_0 to %c5_1 step %c1_2 { - %2 = aie.objectfifo.acquire @out(Produce, 1) : !aie.objectfifosubview> - %3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview> -> memref<10xi32> - func.call @passthrough_10_i32(%1, %3) : (memref<10xi32>, memref<10xi32>) -> () - aie.objectfifo.release @out(Produce, 1) - } - aie.objectfifo.release @in(Consume, 1) - } - aie.end - } {link_with = "kernel.o"} - aiex.runtime_sequence(%arg0: memref<10xi32>, %arg1: memref<10xi32>) { - aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 50][0, 0, 0, 1]) {id = 1 : i64, issue_token = true, metadata = @in} : memref<10xi32> - aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 250][0, 0, 0, 1]) {id = 0 : i64, metadata = @out} : memref<10xi32> - aiex.npu.dma_wait {symbol = @in} - aiex.npu.dma_wait {symbol = @out} - } - } -} - diff --git a/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit b/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit deleted file mode 100644 index 507b70720a..0000000000 --- a/programming_examples/dyn_objFifo/nested_loops/run_makefile.lit +++ /dev/null @@ -1,9 +0,0 @@ -// (c) Copyright 2024 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai, xchess -// -// RUN: make -f %S/Makefile clean -// RUN: make -f %S/Makefile -// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s -// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window/Makefile b/programming_examples/dyn_objFifo/sliding_window/Makefile deleted file mode 100644 index 0216ac75da..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -##===- Makefile -----------------------------------------------------------===## -# -# This file licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# -##===----------------------------------------------------------------------===## - -# --- - -# The following environment variables that point to the Xilinx runtime (XRT) -# should be set up by an environment setup script already. -XILINX_XRT?=/opt/xilinx/xrt -XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) - -# --- - -srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - -XILINX_XRT_INCLUDE?=${XILINX_XRT}/include -XILINX_XRT_LIB?=${XILINX_XRT}/lib - -CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include -XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} -XRT_LIBS=-lxrt_coreutil -CXX=g++-13 -ggdb - -#mlir_target?=build/aie.mlir -xclbin_target?=build/final.xclbin -insts_target?=build/insts.txt -host_target?=build/test - -.PHONY: all -all: ${xclbin_target} ${host_target} - -build/aie.mlir: ${srcdir}/aie2.py - mkdir -p ${@D} - python3 $< > $@ - -build/kernel.o: ${srcdir}/kernel.cc - mkdir -p ${@D} - cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} - -${xclbin_target}: build/aie.mlir build/kernel.o - mkdir -p ${@D} - cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} - -${host_target}: ${srcdir}/test.cpp ${xclbin_target} - mkdir -p ${@D} - ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} - -.PHONY: run -run: ${host_target} - ./${host_target} - -xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh -.PHONY: sign -sign: ${xclbin_target} - ${xclbin_sign} -dev Phoenix -xclbin $< - -.PHONY: clean -clean: - -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window/aie2.py b/programming_examples/dyn_objFifo/sliding_window/aie2.py deleted file mode 100644 index 57d5efb1a5..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window/aie2.py +++ /dev/null @@ -1,74 +0,0 @@ -# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -from aie.dialects.aie import * -from aie.dialects.aiex import * -from aie.helpers.dialects.ext.scf import _for as range_ -from aie.extras.context import mlir_mod_ctx - -N = 100 -n_rows = 10 -dev = AIEDevice.npu1_1col -col = 0 - - -def sliding_window(): - with mlir_mod_ctx() as ctx: - - @device(dev) - def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) - - # Tile declarations - ShimTile = tile(col, 0) - ComputeTile = tile(col, 2) - - # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) - - # AIE Core Function declarations - add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] - ) - - # Set up compute tiles - - @core(ComputeTile, "kernel.o") - def core_body(): - elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) - elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) - of_out.release(ObjectFifoPort.Produce, 1) - - for _ in range_(8): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) - of_in.release(ObjectFifoPort.Consume, 1) - of_out.release(ObjectFifoPort.Produce, 1) - - elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) - elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) - of_in.release(ObjectFifoPort.Consume, 2) - of_out.release(ObjectFifoPort.Produce, 1) - - # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) - - @runtime_sequence(tensor_ty, tensor_ty) - def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) - - print(ctx.module) - - -sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window/kernel.cc b/programming_examples/dyn_objFifo/sliding_window/kernel.cc deleted file mode 100644 index ddb474e102..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window/kernel.cc +++ /dev/null @@ -1,24 +0,0 @@ -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include - -template -void add(const T_in *__restrict inA, const T_in *__restrict inB, - T_out *__restrict out) { - for (int i = 0; i < N; i++) { - out[i] = inA[i] + inB[i]; - } -} - -extern "C" { - -void add_10_i32(const int *__restrict inA, const int *__restrict inB, - int *__restrict out) { - add(inA, inB, out); -} -} diff --git a/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit b/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit deleted file mode 100644 index 507b70720a..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window/run_makefile.lit +++ /dev/null @@ -1,9 +0,0 @@ -// (c) Copyright 2024 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai, xchess -// -// RUN: make -f %S/Makefile clean -// RUN: make -f %S/Makefile -// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s -// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window/test.cpp b/programming_examples/dyn_objFifo/sliding_window/test.cpp deleted file mode 100644 index 3cd72ab880..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window/test.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#ifndef XCLBIN -#define XCLBIN "build/final.xclbin" -#endif - -#ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" -#endif - -#ifndef KERNEL_NAME -#define KERNEL_NAME "MLIR_AIE" -#endif - -#define INPUT_SIZE (100 * sizeof(int)) // in bytes -#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes -#define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE -#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE - -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - -int main(int argc, const char *argv[]) { - - std::vector instr_v = load_instr_sequence(INSTS_TXT); - assert(instr_v.size() > 0); - - // Get a device handle - unsigned int device_index = 0; - xrt::device device = xrt::device(device_index); - - // Load the xclbin - xrt::xclbin xclbin = xrt::xclbin(XCLBIN); - - // Get the kernel from the xclbin - std::vector xkernels = xclbin.get_kernels(); - xrt::xclbin::kernel xkernel = *std::find_if( - xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { - return k.get_name().rfind(KERNEL_NAME, 0) == 0; - }); - std::string kernel_name = xkernel.get_name(); - assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); - - device.register_xclbin(xclbin); - - // get a hardware context - xrt::hw_context context(device, xclbin.get_uuid()); - - // get a kernel handle - auto kernel = xrt::kernel(context, kernel_name); - - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); - auto bo_input = - xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - auto bo_output = - xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - int *buf_input = bo_input.map(); - std::cout << std::endl << std::endl << "Input: " << std::endl; - for (int i = 0; i < INPUT_ROWS; i++) { - std::cout << "row " << i << " : "; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { - buf_input[i * INPUT_ROWS + j] = i; - std::cout << buf_input[i * INPUT_ROWS + j] << " "; - } - std::cout << std::endl << std::endl; - } - int *buf_output = bo_output.map(); - memset(buf_output, 0, OUTPUT_SIZE); - - // Instruction buffer for DMA configuration - void *buf_instr = bo_instr.map(); - memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); - - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - unsigned int opcode = 3; - auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); - ert_cmd_state r = run.wait(); - if (r != ERT_CMD_STATE_COMPLETED) { - std::cout << "Kernel did not complete. Returned status: " << r << "\n"; - return 1; - } - - bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - bool pass = true; - std::cout << std::endl << "Output: " << std::endl; - for (int i = 0; i < OUTPUT_ROWS; i++) { - std::cout << "row " << i << std::endl; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { - int expected_output = 0; - if (i == 0) { - expected_output = buf_input[i * INPUT_ROWS] * 2; - } else { - expected_output = - buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; - } - std::cout << "expected: " << expected_output << ", "; - std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; - pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; - } - std::cout << std::endl << std::endl; - } - std::cout << std::endl << std::endl; - std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; - - return 0; -} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile b/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile deleted file mode 100644 index 0216ac75da..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -##===- Makefile -----------------------------------------------------------===## -# -# This file licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# -##===----------------------------------------------------------------------===## - -# --- - -# The following environment variables that point to the Xilinx runtime (XRT) -# should be set up by an environment setup script already. -XILINX_XRT?=/opt/xilinx/xrt -XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) - -# --- - -srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - -XILINX_XRT_INCLUDE?=${XILINX_XRT}/include -XILINX_XRT_LIB?=${XILINX_XRT}/lib - -CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include -XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} -XRT_LIBS=-lxrt_coreutil -CXX=g++-13 -ggdb - -#mlir_target?=build/aie.mlir -xclbin_target?=build/final.xclbin -insts_target?=build/insts.txt -host_target?=build/test - -.PHONY: all -all: ${xclbin_target} ${host_target} - -build/aie.mlir: ${srcdir}/aie2.py - mkdir -p ${@D} - python3 $< > $@ - -build/kernel.o: ${srcdir}/kernel.cc - mkdir -p ${@D} - cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} - -${xclbin_target}: build/aie.mlir build/kernel.o - mkdir -p ${@D} - cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} - -${host_target}: ${srcdir}/test.cpp ${xclbin_target} - mkdir -p ${@D} - ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} - -.PHONY: run -run: ${host_target} - ./${host_target} - -xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh -.PHONY: sign -sign: ${xclbin_target} - ${xclbin_sign} -dev Phoenix -xclbin $< - -.PHONY: clean -clean: - -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py deleted file mode 100644 index 83719bc8e8..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py +++ /dev/null @@ -1,73 +0,0 @@ -# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import numpy as np - -from aie.dialects.aie import * -from aie.dialects.aiex import * -from aie.helpers.dialects.ext.scf import _for as range_ -from aie.extras.context import mlir_mod_ctx - -N = 100 -n_rows = 10 -dev = AIEDevice.npu1_1col -col = 0 - - -def sliding_window(): - with mlir_mod_ctx() as ctx: - - @device(dev) - def device_body(): - subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] - - # Tile declarations - ShimTile = tile(col, 0) - ComputeTile = tile(col, 2) - - # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) - - # AIE Core Function declarations - add_10_i32 = external_func( - "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] - ) - - # Set up compute tiles - @core(ComputeTile, "kernel.o") - def core_body(): - for i in range_(10): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - if i == 0: - elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - add_10_i32(elemInPre, elemInPre, elemOut) - elif i == 9: - elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsInPost[0], elemsInPost[1], elemOut) - of_in.release(ObjectFifoPort.Consume, 2) - else: - elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsIn[0], elemsIn[1], elemOut) - of_in.release(ObjectFifoPort.Consume, 1) - - of_out.release(ObjectFifoPort.Produce, 1) - - # To/from AIE-array data movement - tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] - - @runtime_sequence(tensor_ty, tensor_ty) - def sequence(A, C): - npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) - dma_wait(of_out) - - print(ctx.module) - - -sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc b/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc deleted file mode 100644 index ddb474e102..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc +++ /dev/null @@ -1,24 +0,0 @@ -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include - -template -void add(const T_in *__restrict inA, const T_in *__restrict inB, - T_out *__restrict out) { - for (int i = 0; i < N; i++) { - out[i] = inA[i] + inB[i]; - } -} - -extern "C" { - -void add_10_i32(const int *__restrict inA, const int *__restrict inB, - int *__restrict out) { - add(inA, inB, out); -} -} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp b/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp deleted file mode 100644 index 3cd72ab880..0000000000 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#ifndef XCLBIN -#define XCLBIN "build/final.xclbin" -#endif - -#ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" -#endif - -#ifndef KERNEL_NAME -#define KERNEL_NAME "MLIR_AIE" -#endif - -#define INPUT_SIZE (100 * sizeof(int)) // in bytes -#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes -#define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE -#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE - -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - -int main(int argc, const char *argv[]) { - - std::vector instr_v = load_instr_sequence(INSTS_TXT); - assert(instr_v.size() > 0); - - // Get a device handle - unsigned int device_index = 0; - xrt::device device = xrt::device(device_index); - - // Load the xclbin - xrt::xclbin xclbin = xrt::xclbin(XCLBIN); - - // Get the kernel from the xclbin - std::vector xkernels = xclbin.get_kernels(); - xrt::xclbin::kernel xkernel = *std::find_if( - xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { - return k.get_name().rfind(KERNEL_NAME, 0) == 0; - }); - std::string kernel_name = xkernel.get_name(); - assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); - - device.register_xclbin(xclbin); - - // get a hardware context - xrt::hw_context context(device, xclbin.get_uuid()); - - // get a kernel handle - auto kernel = xrt::kernel(context, kernel_name); - - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); - auto bo_input = - xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - auto bo_output = - xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - int *buf_input = bo_input.map(); - std::cout << std::endl << std::endl << "Input: " << std::endl; - for (int i = 0; i < INPUT_ROWS; i++) { - std::cout << "row " << i << " : "; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { - buf_input[i * INPUT_ROWS + j] = i; - std::cout << buf_input[i * INPUT_ROWS + j] << " "; - } - std::cout << std::endl << std::endl; - } - int *buf_output = bo_output.map(); - memset(buf_output, 0, OUTPUT_SIZE); - - // Instruction buffer for DMA configuration - void *buf_instr = bo_instr.map(); - memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); - - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - unsigned int opcode = 3; - auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); - ert_cmd_state r = run.wait(); - if (r != ERT_CMD_STATE_COMPLETED) { - std::cout << "Kernel did not complete. Returned status: " << r << "\n"; - return 1; - } - - bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - bool pass = true; - std::cout << std::endl << "Output: " << std::endl; - for (int i = 0; i < OUTPUT_ROWS; i++) { - std::cout << "row " << i << std::endl; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { - int expected_output = 0; - if (i == 0) { - expected_output = buf_input[i * INPUT_ROWS] * 2; - } else { - expected_output = - buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; - } - std::cout << "expected: " << expected_output << ", "; - std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; - pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; - } - std::cout << std::endl << std::endl; - } - std::cout << std::endl << std::endl; - std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; - - return 0; -} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile b/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile deleted file mode 100644 index 4e423e1df1..0000000000 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -##===- Makefile -----------------------------------------------------------===## -# -# This file licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# -##===----------------------------------------------------------------------===## - -# --- - -# The following environment variables that point to the Xilinx runtime (XRT) -# should be set up by an environment setup script already. -XILINX_XRT?=/opt/xilinx/xrt -XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) - -# --- - -srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - -XILINX_XRT_INCLUDE?=${XILINX_XRT}/include -XILINX_XRT_LIB?=${XILINX_XRT}/lib - -CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include -XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} -XRT_LIBS=-lxrt_coreutil -CXX=g++-13 -ggdb - -#mlir_target?=build/aie.mlir -xclbin_target?=build/final.xclbin -insts_target?=build/insts.txt -host_target?=build/test - -.PHONY: all -all: ${xclbin_target} ${host_target} - -build/aie.mlir: ${srcdir}/aie2.py - mkdir -p ${@D} - python3 $< > $@ - -build/kernel.o: ${srcdir}/kernel.cc - mkdir -p ${@D} - cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} - -${xclbin_target}: build/aie.mlir build/kernel.o - mkdir -p ${@D} - cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} - -${host_target}: ${srcdir}/test.cpp ${xclbin_target} - mkdir -p ${@D} - ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} - -.PHONY: run -run: ${host_target} - ./${host_target} - -xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh -.PHONY: sign -sign: ${xclbin_target} - ${xclbin_sign} -dev Phoenix -xclbin $< - -.PHONY: clean -clean: - -rm -r build diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py deleted file mode 100644 index c0d7c805ee..0000000000 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py +++ /dev/null @@ -1,90 +0,0 @@ -# dynamic_object_fifo/two_core_sliding_window/aie2.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import numpy as np - -from aie.dialects.aie import * -from aie.dialects.aiex import * -from aie.helpers.dialects.ext.scf import _for as range_ -from aie.extras.context import mlir_mod_ctx - -N = 100 -n_rows = 10 -dev = AIEDevice.npu1_1col -col = 0 - - -def two_core_sliding_window(): - with mlir_mod_ctx() as ctx: - - @device(dev) - def device_body(): - subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] - - # Tile declarations - ShimTile = tile(col, 0) - ComputeTile = tile(col, 2) - ComputeTile2 = tile(col, 4) - - # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, subtensor_ty) - of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, subtensor_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, subtensor_ty) - - # AIE Core Function declarations - passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[subtensor_ty, subtensor_ty] - ) - add_10_i32 = external_func( - "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] - ) - - # Set up compute tiles - - @core(ComputeTile, "kernel.o") - def core_body(): - for _ in range_(10): - elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) - elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - passthrough_10_i32(elemIn, elemOut) - of_in.release(ObjectFifoPort.Consume, 1) - of_in2.release(ObjectFifoPort.Produce, 1) - - @core(ComputeTile2, "kernel.o") - def core_body(): - elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) - elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) - add_10_i32(elemInPre, elemInPre, elemOutPre) - of_out.release(ObjectFifoPort.Produce, 1) - - for _ in range_(8): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsIn[0], elemsIn[1], elemOut) - of_in2.release(ObjectFifoPort.Consume, 1) - of_out.release(ObjectFifoPort.Produce, 1) - - elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) - elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost) - of_in2.release(ObjectFifoPort.Consume, 2) - of_out.release(ObjectFifoPort.Produce, 1) - - # To/from AIE-array data movement - tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] - - @runtime_sequence(tensor_ty, tensor_ty) - def sequence(A, C): - npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) - dma_wait(of_out) - - print(ctx.module) - - -two_core_sliding_window() diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc b/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc deleted file mode 100644 index 7e4515193c..0000000000 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc +++ /dev/null @@ -1,38 +0,0 @@ -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include - -template -void passthrough(const T_in *__restrict in, T_out *__restrict out) { - for (int i = 0; i < N; i++) { - out[i] = in[i]; - } -} - -extern "C" { - -void passthrough_10_i32(const int *__restrict in, int *__restrict out) { - passthrough(in, out); -} -} - -template -void add(const T_in *__restrict inA, const T_in *__restrict inB, - T_out *__restrict out) { - for (int i = 0; i < N; i++) { - out[i] = inA[i] + inB[i]; - } -} - -extern "C" { - -void add_10_i32(const int *__restrict inA, const int *__restrict inB, - int *__restrict out) { - add(inA, inB, out); -} -} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp b/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp deleted file mode 100644 index 3cd72ab880..0000000000 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// (c) Copyright 2024 AMD Inc. - -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#ifndef XCLBIN -#define XCLBIN "build/final.xclbin" -#endif - -#ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" -#endif - -#ifndef KERNEL_NAME -#define KERNEL_NAME "MLIR_AIE" -#endif - -#define INPUT_SIZE (100 * sizeof(int)) // in bytes -#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes -#define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE -#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE - -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - -int main(int argc, const char *argv[]) { - - std::vector instr_v = load_instr_sequence(INSTS_TXT); - assert(instr_v.size() > 0); - - // Get a device handle - unsigned int device_index = 0; - xrt::device device = xrt::device(device_index); - - // Load the xclbin - xrt::xclbin xclbin = xrt::xclbin(XCLBIN); - - // Get the kernel from the xclbin - std::vector xkernels = xclbin.get_kernels(); - xrt::xclbin::kernel xkernel = *std::find_if( - xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { - return k.get_name().rfind(KERNEL_NAME, 0) == 0; - }); - std::string kernel_name = xkernel.get_name(); - assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); - - device.register_xclbin(xclbin); - - // get a hardware context - xrt::hw_context context(device, xclbin.get_uuid()); - - // get a kernel handle - auto kernel = xrt::kernel(context, kernel_name); - - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); - auto bo_input = - xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - auto bo_output = - xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - int *buf_input = bo_input.map(); - std::cout << std::endl << std::endl << "Input: " << std::endl; - for (int i = 0; i < INPUT_ROWS; i++) { - std::cout << "row " << i << " : "; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { - buf_input[i * INPUT_ROWS + j] = i; - std::cout << buf_input[i * INPUT_ROWS + j] << " "; - } - std::cout << std::endl << std::endl; - } - int *buf_output = bo_output.map(); - memset(buf_output, 0, OUTPUT_SIZE); - - // Instruction buffer for DMA configuration - void *buf_instr = bo_instr.map(); - memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); - - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - unsigned int opcode = 3; - auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); - ert_cmd_state r = run.wait(); - if (r != ERT_CMD_STATE_COMPLETED) { - std::cout << "Kernel did not complete. Returned status: " << r << "\n"; - return 1; - } - - bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - bool pass = true; - std::cout << std::endl << "Output: " << std::endl; - for (int i = 0; i < OUTPUT_ROWS; i++) { - std::cout << "row " << i << std::endl; - for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { - int expected_output = 0; - if (i == 0) { - expected_output = buf_input[i * INPUT_ROWS] * 2; - } else { - expected_output = - buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; - } - std::cout << "expected: " << expected_output << ", "; - std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; - pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; - } - std::cout << std::endl << std::endl; - } - std::cout << std::endl << std::endl; - std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; - - return 0; -} diff --git a/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir b/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir index e91c1f9f21..bdc59cf3a4 100644 --- a/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir +++ b/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir @@ -10,21 +10,66 @@ // RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s +// CHECK: aie.device(npu1_1col) { +// CHECK: memref.global "public" @output_fifo2_cons : memref<10xi32> +// CHECK: memref.global "public" @output_fifo2 : memref<10xi32> +// CHECK: memref.global "public" @input_fifo2_cons : memref<10xi32> +// CHECK: memref.global "public" @input_fifo2 : memref<10xi32> +// CHECK: memref.global "public" @output_fifo_cons : memref<10xi32> +// CHECK: memref.global "public" @output_fifo : memref<10xi32> +// CHECK: memref.global "public" @input_fifo_cons : memref<10xi32> +// CHECK: memref.global "public" @input_fifo : memref<10xi32> +// CHECK: func.func @passthrough_10_i32(%arg0: memref<10xi32>, %arg1: memref<10xi32>) { +// CHECK: return +// CHECK: } +// CHECK: %tile_0_0 = aie.tile(0, 0) +// CHECK: %tile_0_2 = aie.tile(0, 2) +// CHECK: %tile_0_4 = aie.tile(0, 4) +// CHECK: %output_fifo2_cons_prod_lock = aie.lock(%tile_0_0, 6) {init = 1 : i32, sym_name = "output_fifo2_cons_prod_lock"} +// CHECK: %output_fifo2_cons_cons_lock = aie.lock(%tile_0_0, 7) {init = 0 : i32, sym_name = "output_fifo2_cons_cons_lock"} +// CHECK: %output_fifo2_buff_0 = aie.buffer(%tile_0_4) {sym_name = "output_fifo2_buff_0"} : memref<10xi32> +// CHECK: %output_fifo2_buff_1 = aie.buffer(%tile_0_4) {sym_name = "output_fifo2_buff_1"} : memref<10xi32> +// CHECK: %output_fifo2_prod_lock = aie.lock(%tile_0_4, 2) {init = 2 : i32, sym_name = "output_fifo2_prod_lock"} +// CHECK: %output_fifo2_cons_lock = aie.lock(%tile_0_4, 3) {init = 0 : i32, sym_name = "output_fifo2_cons_lock"} +// CHECK: %input_fifo2_cons_buff_0 = aie.buffer(%tile_0_4) {sym_name = "input_fifo2_cons_buff_0"} : memref<10xi32> +// CHECK: %input_fifo2_cons_buff_1 = aie.buffer(%tile_0_4) {sym_name = "input_fifo2_cons_buff_1"} : memref<10xi32> +// CHECK: %input_fifo2_cons_prod_lock = aie.lock(%tile_0_4, 0) {init = 2 : i32, sym_name = "input_fifo2_cons_prod_lock"} +// CHECK: %input_fifo2_cons_cons_lock = aie.lock(%tile_0_4, 1) {init = 0 : i32, sym_name = "input_fifo2_cons_cons_lock"} +// CHECK: %input_fifo2_prod_lock = aie.lock(%tile_0_0, 4) {init = 1 : i32, sym_name = "input_fifo2_prod_lock"} +// CHECK: %input_fifo2_cons_lock = aie.lock(%tile_0_0, 5) {init = 0 : i32, sym_name = "input_fifo2_cons_lock"} +// CHECK: %output_fifo_cons_prod_lock = aie.lock(%tile_0_0, 2) {init = 1 : i32, sym_name = "output_fifo_cons_prod_lock"} +// CHECK: %output_fifo_cons_cons_lock = aie.lock(%tile_0_0, 3) {init = 0 : i32, sym_name = "output_fifo_cons_cons_lock"} +// CHECK: %output_fifo_buff_0 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_0"} : memref<10xi32> +// CHECK: %output_fifo_buff_1 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_1"} : memref<10xi32> +// CHECK: %output_fifo_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "output_fifo_prod_lock"} +// CHECK: %output_fifo_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "output_fifo_cons_lock"} +// CHECK: %input_fifo_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_0"} : memref<10xi32> +// CHECK: %input_fifo_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_1"} : memref<10xi32> +// CHECK: %input_fifo_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "input_fifo_cons_prod_lock"} +// CHECK: %input_fifo_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "input_fifo_cons_cons_lock"} +// CHECK: %input_fifo_prod_lock = aie.lock(%tile_0_0, 0) {init = 1 : i32, sym_name = "input_fifo_prod_lock"} +// CHECK: %input_fifo_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "input_fifo_cons_lock"} +// CHECK: aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) +// CHECK: aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) +// CHECK: aie.flow(%tile_0_0, DMA : 1, %tile_0_4, DMA : 0) +// CHECK: aie.flow(%tile_0_4, DMA : 0, %tile_0_0, DMA : 1) +// CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xi32> // CHECK: %core_0_2 = aie.core(%tile_0_2) { +// CHECK: %c0_i32 = arith.constant 0 : i32 // CHECK: %c0 = arith.constant 0 : index -// CHECK: %c0_0 = arith.constant 0 : index -// CHECK: %c2 = arith.constant 2 : index -// CHECK: memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> +// CHECK: %c2_i32 = arith.constant 2 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_2[%c0] : memref<2xi32> // CHECK: %c1 = arith.constant 1 : index -// CHECK: %c2_1 = arith.constant 2 : index -// CHECK: memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %c0_2 = arith.constant 0 : index -// CHECK: %c1_3 = arith.constant 1 : index +// CHECK: %c2_i32_0 = arith.constant 2 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %c0_1 = arith.constant 0 : index +// CHECK: %c1_2 = arith.constant 1 : index // CHECK: %c10 = arith.constant 10 : index -// CHECK: scf.for %arg0 = %c0_2 to %c10 step %c1_3 { +// CHECK: scf.for %arg0 = %c0_1 to %c10 step %c1_2 { // CHECK: aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) -// CHECK: %0 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %1 = scf.index_switch %0 -> memref<10xi32> +// CHECK: %0 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %1 = arith.index_cast %0 : i32 to index +// CHECK: %2 = scf.index_switch %1 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } @@ -35,8 +80,9 @@ // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } // CHECK: aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: %2 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %3 = scf.index_switch %2 -> memref<10xi32> +// CHECK: %3 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %4 = arith.index_cast %3 : i32 to index +// CHECK: %5 = scf.index_switch %4 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } @@ -46,39 +92,75 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } -// CHECK: func.call @passthrough_10_i32(%3, %1) : (memref<10xi32>, memref<10xi32>) -> () +// CHECK: func.call @passthrough_10_i32(%5, %2) : (memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%input_fifo_cons_prod_lock, Release, 1) -// CHECK: %4 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %c1_4 = arith.constant 1 : index -// CHECK: %5 = arith.addi %4, %c1_4 : index -// CHECK: %6 = arith.remsi %5, %c2_1 : index -// CHECK: memref.store %6, %buffer_0_2[%c1] : memref<2xindex> +// CHECK: %6 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %c1_i32 = arith.constant 1 : i32 +// CHECK: %7 = arith.addi %6, %c1_i32 : i32 +// CHECK: %8 = arith.remsi %7, %c2_i32_0 : i32 +// CHECK: memref.store %8, %buffer_0_2[%c1] : memref<2xi32> // CHECK: aie.use_lock(%output_fifo_cons_lock, Release, 1) -// CHECK: %7 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %c1_5 = arith.constant 1 : index -// CHECK: %8 = arith.addi %7, %c1_5 : index -// CHECK: %9 = arith.remsi %8, %c2 : index -// CHECK: memref.store %9, %buffer_0_2[%c0_0] : memref<2xindex> +// CHECK: %9 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %c1_i32_3 = arith.constant 1 : i32 +// CHECK: %10 = arith.addi %9, %c1_i32_3 : i32 +// CHECK: %11 = arith.remsi %10, %c2_i32 : i32 +// CHECK: memref.store %11, %buffer_0_2[%c0] : memref<2xi32> // CHECK: } // CHECK: aie.end // CHECK: } {dynamic_objfifo_lowering = true} +// CHECK: %buffer_0_4 = aie.buffer(%tile_0_4) : memref<2xi32> // CHECK: %core_0_4 = aie.core(%tile_0_4) { +// CHECK: %c0_i32 = arith.constant 0 : i32 // CHECK: %c0 = arith.constant 0 : index +// CHECK: %c2_i32 = arith.constant 2 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_4[%c0] : memref<2xi32> // CHECK: %c1 = arith.constant 1 : index +// CHECK: %c2_i32_0 = arith.constant 2 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_4[%c1] : memref<2xi32> +// CHECK: %c0_1 = arith.constant 0 : index +// CHECK: %c1_2 = arith.constant 1 : index // CHECK: %c10 = arith.constant 10 : index -// CHECK: %c2 = arith.constant 2 : index -// CHECK: scf.for %arg0 = %c0 to %c10 step %c2 { -// CHECK: aie.use_lock(%output_fifo2_prod_lock, AcquireGreaterEqual, 1) -// CHECK: aie.use_lock(%input_fifo2_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: func.call @passthrough_10_i32(%input_fifo2_cons_buff_0, %output_fifo2_buff_0) : (memref<10xi32>, memref<10xi32>) -> () -// CHECK: aie.use_lock(%input_fifo2_cons_prod_lock, Release, 1) -// CHECK: aie.use_lock(%output_fifo2_cons_lock, Release, 1) +// CHECK: scf.for %arg0 = %c0_1 to %c10 step %c1_2 { // CHECK: aie.use_lock(%output_fifo2_prod_lock, AcquireGreaterEqual, 1) +// CHECK: %0 = memref.load %buffer_0_4[%c0] : memref<2xi32> +// CHECK: %1 = arith.index_cast %0 : i32 to index +// CHECK: %2 = scf.index_switch %1 -> memref<10xi32> +// CHECK: case 0 { +// CHECK: scf.yield %output_fifo2_buff_0 : memref<10xi32> +// CHECK: } +// CHECK: case 1 { +// CHECK: scf.yield %output_fifo2_buff_1 : memref<10xi32> +// CHECK: } +// CHECK: default { +// CHECK: scf.yield %output_fifo2_buff_0 : memref<10xi32> +// CHECK: } // CHECK: aie.use_lock(%input_fifo2_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: func.call @passthrough_10_i32(%input_fifo2_cons_buff_1, %output_fifo2_buff_1) : (memref<10xi32>, memref<10xi32>) -> () +// CHECK: %3 = memref.load %buffer_0_4[%c1] : memref<2xi32> +// CHECK: %4 = arith.index_cast %3 : i32 to index +// CHECK: %5 = scf.index_switch %4 -> memref<10xi32> +// CHECK: case 0 { +// CHECK: scf.yield %input_fifo2_cons_buff_0 : memref<10xi32> +// CHECK: } +// CHECK: case 1 { +// CHECK: scf.yield %input_fifo2_cons_buff_1 : memref<10xi32> +// CHECK: } +// CHECK: default { +// CHECK: scf.yield %input_fifo2_cons_buff_0 : memref<10xi32> +// CHECK: } +// CHECK: func.call @passthrough_10_i32(%5, %2) : (memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%input_fifo2_cons_prod_lock, Release, 1) +// CHECK: %6 = memref.load %buffer_0_4[%c1] : memref<2xi32> +// CHECK: %c1_i32 = arith.constant 1 : i32 +// CHECK: %7 = arith.addi %6, %c1_i32 : i32 +// CHECK: %8 = arith.remsi %7, %c2_i32_0 : i32 +// CHECK: memref.store %8, %buffer_0_4[%c1] : memref<2xi32> // CHECK: aie.use_lock(%output_fifo2_cons_lock, Release, 1) -// CHECK: } +// CHECK: %9 = memref.load %buffer_0_4[%c0] : memref<2xi32> +// CHECK: %c1_i32_3 = arith.constant 1 : i32 +// CHECK: %10 = arith.addi %9, %c1_i32_3 : i32 +// CHECK: %11 = arith.remsi %10, %c2_i32 : i32 +// CHECK: memref.store %11, %buffer_0_4[%c0] : memref<2xi32> +// CHECK: } // CHECK: aie.end // CHECK: } // CHECK: aie.shim_dma_allocation @input_fifo(MM2S, 0, 0) diff --git a/test/objectFifo-stateful-transform/dynamic_lowering_test.mlir b/test/objectFifo-stateful-transform/dynamic_lowering_test.mlir index c169c8472e..087b8e5a2a 100644 --- a/test/objectFifo-stateful-transform/dynamic_lowering_test.mlir +++ b/test/objectFifo-stateful-transform/dynamic_lowering_test.mlir @@ -35,21 +35,22 @@ // CHECK: %input_fifo_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "input_fifo_cons_lock"} // CHECK: aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) // CHECK: aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) -// CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xindex> +// CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xi32> // CHECK: %core_0_2 = aie.core(%tile_0_2) { +// CHECK: %c0_i32 = arith.constant 0 : i32 // CHECK: %c0 = arith.constant 0 : index -// CHECK: %c0_0 = arith.constant 0 : index -// CHECK: %c2 = arith.constant 2 : index -// CHECK: memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> +// CHECK: %c2_i32 = arith.constant 2 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_2[%c0] : memref<2xi32> // CHECK: %c1 = arith.constant 1 : index -// CHECK: %c3 = arith.constant 3 : index -// CHECK: memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %c0_1 = arith.constant 0 : index -// CHECK: %c1_2 = arith.constant 1 : index +// CHECK: %c3_i32 = arith.constant 3 : i32 +// CHECK: memref.store %c0_i32, %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %c0_0 = arith.constant 0 : index +// CHECK: %c1_1 = arith.constant 1 : index // CHECK: %c9 = arith.constant 9 : index // CHECK: aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) -// CHECK: %0 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %1 = scf.index_switch %0 -> memref<10xi32> +// CHECK: %0 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %1 = arith.index_cast %0 : i32 to index +// CHECK: %2 = scf.index_switch %1 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } @@ -60,8 +61,9 @@ // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } // CHECK: aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: %2 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %3 = scf.index_switch %2 -> memref<10xi32> +// CHECK: %3 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %4 = arith.index_cast %3 : i32 to index +// CHECK: %5 = scf.index_switch %4 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } @@ -74,17 +76,18 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } -// CHECK: func.call @add_10_i32(%3, %3, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () +// CHECK: func.call @add_10_i32(%5, %5, %2) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%output_fifo_cons_lock, Release, 1) -// CHECK: %4 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %c1_3 = arith.constant 1 : index -// CHECK: %5 = arith.addi %4, %c1_3 : index -// CHECK: %6 = arith.remsi %5, %c2 : index -// CHECK: memref.store %6, %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: scf.for %arg0 = %c0_1 to %c9 step %c1_2 { +// CHECK: %6 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %c1_i32 = arith.constant 1 : i32 +// CHECK: %7 = arith.addi %6, %c1_i32 : i32 +// CHECK: %8 = arith.remsi %7, %c2_i32 : i32 +// CHECK: memref.store %8, %buffer_0_2[%c0] : memref<2xi32> +// CHECK: scf.for %arg0 = %c0_0 to %c9 step %c1_1 { // CHECK: aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) -// CHECK: %19 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %20 = scf.index_switch %19 -> memref<10xi32> +// CHECK: %24 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %25 = arith.index_cast %24 : i32 to index +// CHECK: %26 = scf.index_switch %25 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } @@ -95,8 +98,9 @@ // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } // CHECK: aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: %21 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %22 = scf.index_switch %21 -> memref<10xi32> +// CHECK: %27 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %28 = arith.index_cast %27 : i32 to index +// CHECK: %29 = scf.index_switch %28 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } @@ -109,8 +113,9 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } -// CHECK: %23 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %24 = scf.index_switch %23 -> memref<10xi32> +// CHECK: %30 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %31 = arith.index_cast %30 : i32 to index +// CHECK: %32 = scf.index_switch %31 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_1 : memref<10xi32> // CHECK: } @@ -123,23 +128,24 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_1 : memref<10xi32> // CHECK: } -// CHECK: func.call @add_10_i32(%22, %24, %20) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () +// CHECK: func.call @add_10_i32(%29, %32, %26) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%input_fifo_cons_prod_lock, Release, 1) -// CHECK: %25 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %c1_6 = arith.constant 1 : index -// CHECK: %26 = arith.addi %25, %c1_6 : index -// CHECK: %27 = arith.remsi %26, %c3 : index -// CHECK: memref.store %27, %buffer_0_2[%c1] : memref<2xindex> +// CHECK: %33 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %c1_i32_4 = arith.constant 1 : i32 +// CHECK: %34 = arith.addi %33, %c1_i32_4 : i32 +// CHECK: %35 = arith.remsi %34, %c3_i32 : i32 +// CHECK: memref.store %35, %buffer_0_2[%c1] : memref<2xi32> // CHECK: aie.use_lock(%output_fifo_cons_lock, Release, 1) -// CHECK: %28 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %c1_7 = arith.constant 1 : index -// CHECK: %29 = arith.addi %28, %c1_7 : index -// CHECK: %30 = arith.remsi %29, %c2 : index -// CHECK: memref.store %30, %buffer_0_2[%c0_0] : memref<2xindex> +// CHECK: %36 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %c1_i32_5 = arith.constant 1 : i32 +// CHECK: %37 = arith.addi %36, %c1_i32_5 : i32 +// CHECK: %38 = arith.remsi %37, %c2_i32 : i32 +// CHECK: memref.store %38, %buffer_0_2[%c0] : memref<2xi32> // CHECK: } // CHECK: aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) -// CHECK: %7 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %8 = scf.index_switch %7 -> memref<10xi32> +// CHECK: %9 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %10 = arith.index_cast %9 : i32 to index +// CHECK: %11 = scf.index_switch %10 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } @@ -150,8 +156,9 @@ // CHECK: scf.yield %output_fifo_buff_0 : memref<10xi32> // CHECK: } // CHECK: aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: %9 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %10 = scf.index_switch %9 -> memref<10xi32> +// CHECK: %12 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %13 = arith.index_cast %12 : i32 to index +// CHECK: %14 = scf.index_switch %13 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } @@ -164,8 +171,9 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_0 : memref<10xi32> // CHECK: } -// CHECK: %11 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %12 = scf.index_switch %11 -> memref<10xi32> +// CHECK: %15 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %16 = arith.index_cast %15 : i32 to index +// CHECK: %17 = scf.index_switch %16 -> memref<10xi32> // CHECK: case 0 { // CHECK: scf.yield %input_fifo_cons_buff_1 : memref<10xi32> // CHECK: } @@ -178,19 +186,19 @@ // CHECK: default { // CHECK: scf.yield %input_fifo_cons_buff_1 : memref<10xi32> // CHECK: } -// CHECK: func.call @add_10_i32(%10, %12, %8) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () +// CHECK: func.call @add_10_i32(%14, %17, %11) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%input_fifo_cons_prod_lock, Release, 2) -// CHECK: %13 = memref.load %buffer_0_2[%c1] : memref<2xindex> -// CHECK: %c2_4 = arith.constant 2 : index -// CHECK: %14 = arith.addi %13, %c2_4 : index -// CHECK: %15 = arith.remsi %14, %c3 : index -// CHECK: memref.store %15, %buffer_0_2[%c1] : memref<2xindex> +// CHECK: %18 = memref.load %buffer_0_2[%c1] : memref<2xi32> +// CHECK: %c2_i32_2 = arith.constant 2 : i32 +// CHECK: %19 = arith.addi %18, %c2_i32_2 : i32 +// CHECK: %20 = arith.remsi %19, %c3_i32 : i32 +// CHECK: memref.store %20, %buffer_0_2[%c1] : memref<2xi32> // CHECK: aie.use_lock(%output_fifo_cons_lock, Release, 1) -// CHECK: %16 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> -// CHECK: %c1_5 = arith.constant 1 : index -// CHECK: %17 = arith.addi %16, %c1_5 : index -// CHECK: %18 = arith.remsi %17, %c2 : index -// CHECK: memref.store %18, %buffer_0_2[%c0_0] : memref<2xindex> +// CHECK: %21 = memref.load %buffer_0_2[%c0] : memref<2xi32> +// CHECK: %c1_i32_3 = arith.constant 1 : i32 +// CHECK: %22 = arith.addi %21, %c1_i32_3 : i32 +// CHECK: %23 = arith.remsi %22, %c2_i32 : i32 +// CHECK: memref.store %23, %buffer_0_2[%c0] : memref<2xi32> // CHECK: aie.end // CHECK: } // CHECK: aie.shim_dma_allocation @input_fifo(MM2S, 0, 0) From bf8ef6631bbe1ddc1ee53d8bc5f299625c5f370c Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 14:45:36 -0700 Subject: [PATCH 39/46] Got the flags wrong in the previous one --- .../dynamic_lowering_flag_test.mlir | 96 ++----------------- 1 file changed, 8 insertions(+), 88 deletions(-) diff --git a/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir b/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir index bdc59cf3a4..16c028b6c3 100644 --- a/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir +++ b/test/objectFifo-stateful-transform/dynamic_lowering_flag_test.mlir @@ -10,50 +10,6 @@ // RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK: aie.device(npu1_1col) { -// CHECK: memref.global "public" @output_fifo2_cons : memref<10xi32> -// CHECK: memref.global "public" @output_fifo2 : memref<10xi32> -// CHECK: memref.global "public" @input_fifo2_cons : memref<10xi32> -// CHECK: memref.global "public" @input_fifo2 : memref<10xi32> -// CHECK: memref.global "public" @output_fifo_cons : memref<10xi32> -// CHECK: memref.global "public" @output_fifo : memref<10xi32> -// CHECK: memref.global "public" @input_fifo_cons : memref<10xi32> -// CHECK: memref.global "public" @input_fifo : memref<10xi32> -// CHECK: func.func @passthrough_10_i32(%arg0: memref<10xi32>, %arg1: memref<10xi32>) { -// CHECK: return -// CHECK: } -// CHECK: %tile_0_0 = aie.tile(0, 0) -// CHECK: %tile_0_2 = aie.tile(0, 2) -// CHECK: %tile_0_4 = aie.tile(0, 4) -// CHECK: %output_fifo2_cons_prod_lock = aie.lock(%tile_0_0, 6) {init = 1 : i32, sym_name = "output_fifo2_cons_prod_lock"} -// CHECK: %output_fifo2_cons_cons_lock = aie.lock(%tile_0_0, 7) {init = 0 : i32, sym_name = "output_fifo2_cons_cons_lock"} -// CHECK: %output_fifo2_buff_0 = aie.buffer(%tile_0_4) {sym_name = "output_fifo2_buff_0"} : memref<10xi32> -// CHECK: %output_fifo2_buff_1 = aie.buffer(%tile_0_4) {sym_name = "output_fifo2_buff_1"} : memref<10xi32> -// CHECK: %output_fifo2_prod_lock = aie.lock(%tile_0_4, 2) {init = 2 : i32, sym_name = "output_fifo2_prod_lock"} -// CHECK: %output_fifo2_cons_lock = aie.lock(%tile_0_4, 3) {init = 0 : i32, sym_name = "output_fifo2_cons_lock"} -// CHECK: %input_fifo2_cons_buff_0 = aie.buffer(%tile_0_4) {sym_name = "input_fifo2_cons_buff_0"} : memref<10xi32> -// CHECK: %input_fifo2_cons_buff_1 = aie.buffer(%tile_0_4) {sym_name = "input_fifo2_cons_buff_1"} : memref<10xi32> -// CHECK: %input_fifo2_cons_prod_lock = aie.lock(%tile_0_4, 0) {init = 2 : i32, sym_name = "input_fifo2_cons_prod_lock"} -// CHECK: %input_fifo2_cons_cons_lock = aie.lock(%tile_0_4, 1) {init = 0 : i32, sym_name = "input_fifo2_cons_cons_lock"} -// CHECK: %input_fifo2_prod_lock = aie.lock(%tile_0_0, 4) {init = 1 : i32, sym_name = "input_fifo2_prod_lock"} -// CHECK: %input_fifo2_cons_lock = aie.lock(%tile_0_0, 5) {init = 0 : i32, sym_name = "input_fifo2_cons_lock"} -// CHECK: %output_fifo_cons_prod_lock = aie.lock(%tile_0_0, 2) {init = 1 : i32, sym_name = "output_fifo_cons_prod_lock"} -// CHECK: %output_fifo_cons_cons_lock = aie.lock(%tile_0_0, 3) {init = 0 : i32, sym_name = "output_fifo_cons_cons_lock"} -// CHECK: %output_fifo_buff_0 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_0"} : memref<10xi32> -// CHECK: %output_fifo_buff_1 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_1"} : memref<10xi32> -// CHECK: %output_fifo_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "output_fifo_prod_lock"} -// CHECK: %output_fifo_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "output_fifo_cons_lock"} -// CHECK: %input_fifo_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_0"} : memref<10xi32> -// CHECK: %input_fifo_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_1"} : memref<10xi32> -// CHECK: %input_fifo_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "input_fifo_cons_prod_lock"} -// CHECK: %input_fifo_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "input_fifo_cons_cons_lock"} -// CHECK: %input_fifo_prod_lock = aie.lock(%tile_0_0, 0) {init = 1 : i32, sym_name = "input_fifo_prod_lock"} -// CHECK: %input_fifo_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "input_fifo_cons_lock"} -// CHECK: aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) -// CHECK: aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) -// CHECK: aie.flow(%tile_0_0, DMA : 1, %tile_0_4, DMA : 0) -// CHECK: aie.flow(%tile_0_4, DMA : 0, %tile_0_0, DMA : 1) -// CHECK: %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xi32> // CHECK: %core_0_2 = aie.core(%tile_0_2) { // CHECK: %c0_i32 = arith.constant 0 : i32 // CHECK: %c0 = arith.constant 0 : index @@ -108,58 +64,22 @@ // CHECK: } // CHECK: aie.end // CHECK: } {dynamic_objfifo_lowering = true} -// CHECK: %buffer_0_4 = aie.buffer(%tile_0_4) : memref<2xi32> // CHECK: %core_0_4 = aie.core(%tile_0_4) { -// CHECK: %c0_i32 = arith.constant 0 : i32 // CHECK: %c0 = arith.constant 0 : index -// CHECK: %c2_i32 = arith.constant 2 : i32 -// CHECK: memref.store %c0_i32, %buffer_0_4[%c0] : memref<2xi32> // CHECK: %c1 = arith.constant 1 : index -// CHECK: %c2_i32_0 = arith.constant 2 : i32 -// CHECK: memref.store %c0_i32, %buffer_0_4[%c1] : memref<2xi32> -// CHECK: %c0_1 = arith.constant 0 : index -// CHECK: %c1_2 = arith.constant 1 : index // CHECK: %c10 = arith.constant 10 : index -// CHECK: scf.for %arg0 = %c0_1 to %c10 step %c1_2 { +// CHECK: %c2 = arith.constant 2 : index +// CHECK: scf.for %arg0 = %c0 to %c10 step %c2 { // CHECK: aie.use_lock(%output_fifo2_prod_lock, AcquireGreaterEqual, 1) -// CHECK: %0 = memref.load %buffer_0_4[%c0] : memref<2xi32> -// CHECK: %1 = arith.index_cast %0 : i32 to index -// CHECK: %2 = scf.index_switch %1 -> memref<10xi32> -// CHECK: case 0 { -// CHECK: scf.yield %output_fifo2_buff_0 : memref<10xi32> -// CHECK: } -// CHECK: case 1 { -// CHECK: scf.yield %output_fifo2_buff_1 : memref<10xi32> -// CHECK: } -// CHECK: default { -// CHECK: scf.yield %output_fifo2_buff_0 : memref<10xi32> -// CHECK: } // CHECK: aie.use_lock(%input_fifo2_cons_cons_lock, AcquireGreaterEqual, 1) -// CHECK: %3 = memref.load %buffer_0_4[%c1] : memref<2xi32> -// CHECK: %4 = arith.index_cast %3 : i32 to index -// CHECK: %5 = scf.index_switch %4 -> memref<10xi32> -// CHECK: case 0 { -// CHECK: scf.yield %input_fifo2_cons_buff_0 : memref<10xi32> -// CHECK: } -// CHECK: case 1 { -// CHECK: scf.yield %input_fifo2_cons_buff_1 : memref<10xi32> -// CHECK: } -// CHECK: default { -// CHECK: scf.yield %input_fifo2_cons_buff_0 : memref<10xi32> -// CHECK: } -// CHECK: func.call @passthrough_10_i32(%5, %2) : (memref<10xi32>, memref<10xi32>) -> () +// CHECK: func.call @passthrough_10_i32(%input_fifo2_cons_buff_0, %output_fifo2_buff_0) : (memref<10xi32>, memref<10xi32>) -> () +// CHECK: aie.use_lock(%input_fifo2_cons_prod_lock, Release, 1) +// CHECK: aie.use_lock(%output_fifo2_cons_lock, Release, 1) +// CHECK: aie.use_lock(%output_fifo2_prod_lock, AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%input_fifo2_cons_cons_lock, AcquireGreaterEqual, 1) +// CHECK: func.call @passthrough_10_i32(%input_fifo2_cons_buff_1, %output_fifo2_buff_1) : (memref<10xi32>, memref<10xi32>) -> () // CHECK: aie.use_lock(%input_fifo2_cons_prod_lock, Release, 1) -// CHECK: %6 = memref.load %buffer_0_4[%c1] : memref<2xi32> -// CHECK: %c1_i32 = arith.constant 1 : i32 -// CHECK: %7 = arith.addi %6, %c1_i32 : i32 -// CHECK: %8 = arith.remsi %7, %c2_i32_0 : i32 -// CHECK: memref.store %8, %buffer_0_4[%c1] : memref<2xi32> // CHECK: aie.use_lock(%output_fifo2_cons_lock, Release, 1) -// CHECK: %9 = memref.load %buffer_0_4[%c0] : memref<2xi32> -// CHECK: %c1_i32_3 = arith.constant 1 : i32 -// CHECK: %10 = arith.addi %9, %c1_i32_3 : i32 -// CHECK: %11 = arith.remsi %10, %c2_i32 : i32 -// CHECK: memref.store %11, %buffer_0_4[%c0] : memref<2xi32> // CHECK: } // CHECK: aie.end // CHECK: } From efa97dc3a4d5f88318343739d692663ecb190832 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 4 Dec 2024 14:51:06 -0700 Subject: [PATCH 40/46] Reverting changes from test.cpp for failing tests --- .../sliding_window/test.cpp | 22 +++++-------------- .../sliding_window_conditional/test.cpp | 2 +- .../two_core_sliding_window/test.cpp | 2 +- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index 5c78b0e986..0fb9cfa7d4 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "build/final.xclbin" +#define XCLBIN "final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" +#define INSTS_TXT "insts.txt" #endif #ifndef KERNEL_NAME @@ -28,27 +28,15 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes + #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} +#include "test_utils.h" int main(int argc, const char *argv[]) { - std::vector instr_v = load_instr_sequence(INSTS_TXT); + std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp index c25d9358f6..0fb9cfa7d4 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp @@ -28,7 +28,7 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define WIDTH 10 + #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp index c25d9358f6..0fb9cfa7d4 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp @@ -28,7 +28,7 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define WIDTH 10 + #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE From 946fb84bdfbf50c53be5e230bfa767ce0d043da2 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 09:52:21 -0700 Subject: [PATCH 41/46] MLIR version for sliding_window_conditional since python file fails --- .../sliding_window_conditional/aie.mlir | 202 ++++++++++++++++++ .../sliding_window_conditional/aie2.py | 3 +- .../sliding_window_conditional/run.lit | 10 + 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir create mode 100644 test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir new file mode 100644 index 0000000000..cfe608eed0 --- /dev/null +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir @@ -0,0 +1,202 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: MIT +// +//===----------------------------------------------------------------------===// + +module { + aie.device(npu1_1col) { + memref.global "public" @output_fifo_cons : memref<10xi32> + memref.global "public" @output_fifo : memref<10xi32> + memref.global "public" @input_fifo_cons : memref<10xi32> + memref.global "public" @input_fifo : memref<10xi32> + func.func private @add_10_i32(memref<10xi32>, memref<10xi32>, memref<10xi32>) + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + %output_fifo_cons_prod_lock = aie.lock(%tile_0_0, 2) {init = 0 : i32, sym_name = "output_fifo_cons_prod_lock"} + %output_fifo_cons_cons_lock = aie.lock(%tile_0_0, 3) {init = 0 : i32, sym_name = "output_fifo_cons_cons_lock"} + %output_fifo_buff_0 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_0"} : memref<10xi32> + %output_fifo_buff_1 = aie.buffer(%tile_0_2) {sym_name = "output_fifo_buff_1"} : memref<10xi32> + %output_fifo_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "output_fifo_prod_lock"} + %output_fifo_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "output_fifo_cons_lock"} + %input_fifo_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_0"} : memref<10xi32> + %input_fifo_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_1"} : memref<10xi32> + %input_fifo_cons_buff_2 = aie.buffer(%tile_0_2) {sym_name = "input_fifo_cons_buff_2"} : memref<10xi32> + %input_fifo_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 3 : i32, sym_name = "input_fifo_cons_prod_lock"} + %input_fifo_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "input_fifo_cons_cons_lock"} + %input_fifo_prod_lock = aie.lock(%tile_0_0, 0) {init = 0 : i32, sym_name = "input_fifo_prod_lock"} + %input_fifo_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "input_fifo_cons_lock"} + aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) + aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) + %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xindex> + %core_0_2 = aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c0_0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> + %c0_1 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1_2 = arith.constant 1 : index + scf.for %arg0 = %c0_1 to %c10 step %c1_2 { + aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) + %0 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> + %1 = scf.index_switch %0 -> memref<10xi32> + case 0 { + scf.yield %output_fifo_buff_0 : memref<10xi32> + } + case 1 { + scf.yield %output_fifo_buff_1 : memref<10xi32> + } + default { + scf.yield %output_fifo_buff_0 : memref<10xi32> + } + %2 = arith.cmpi eq, %arg0, %c0_1 : index + %3 = arith.subi %c10, %c1_2 : index + %4 = arith.cmpi eq, %arg0, %3 : index + scf.if %2 { + aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) + %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %9 = scf.index_switch %8 -> memref<10xi32> + case 0 { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + case 1 { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + case 2 { + scf.yield %input_fifo_cons_buff_2 : memref<10xi32> + } + default { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + func.call @add_10_i32(%9, %9, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + } else { + scf.if %4 { + aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 2) + %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %9 = scf.index_switch %8 -> memref<10xi32> + case 0 { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + case 1 { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + case 2 { + scf.yield %input_fifo_cons_buff_2 : memref<10xi32> + } + default { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + %10 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %11 = scf.index_switch %10 -> memref<10xi32> + case 0 { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + case 1 { + scf.yield %input_fifo_cons_buff_2 : memref<10xi32> + } + case 2 { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + default { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + aie.use_lock(%input_fifo_cons_prod_lock, Release, 2) + %12 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %c2_4 = arith.constant 2 : index + %13 = arith.addi %12, %c2_4 : index + %14 = arith.remsi %13, %c3 : index + memref.store %14, %buffer_0_2[%c1] : memref<2xindex> + } else { + %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %9 = scf.index_switch %8 -> memref<10xi32> + case 0 { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + case 1 { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + case 2 { + scf.yield %input_fifo_cons_buff_2 : memref<10xi32> + } + default { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + %10 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %11 = scf.index_switch %10 -> memref<10xi32> + case 0 { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + case 1 { + scf.yield %input_fifo_cons_buff_2 : memref<10xi32> + } + case 2 { + scf.yield %input_fifo_cons_buff_0 : memref<10xi32> + } + default { + scf.yield %input_fifo_cons_buff_1 : memref<10xi32> + } + func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + aie.use_lock(%input_fifo_cons_prod_lock, Release, 1) + %12 = memref.load %buffer_0_2[%c1] : memref<2xindex> + %c1_4 = arith.constant 1 : index + %13 = arith.addi %12, %c1_4 : index + %14 = arith.remsi %13, %c3 : index + memref.store %14, %buffer_0_2[%c1] : memref<2xindex> + } + } + aie.use_lock(%output_fifo_cons_lock, Release, 1) + %5 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> + %c1_3 = arith.constant 1 : index + %6 = arith.addi %5, %c1_3 : index + %7 = arith.remsi %6, %c2 : index + memref.store %7, %buffer_0_2[%c0_0] : memref<2xindex> + } + aie.end + } {link_with = "kernel.o"} + aie.shim_dma_allocation @input_fifo(MM2S, 0, 0) + aiex.runtime_sequence(%arg0: memref<10xi32>, %arg1: memref<10xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 100][0, 0, 0, 1]) {id = 0 : i64, metadata = @input_fifo} : memref<10xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 100][0, 0, 0, 1]) {id = 2 : i64, metadata = @output_fifo} : memref<10xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + } + aie.shim_dma_allocation @output_fifo(S2MM, 0, 0) + %mem_0_2 = aie.mem(%tile_0_2) { + %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb4) + ^bb1: // 2 preds: ^bb0, ^bb3 + aie.use_lock(%input_fifo_cons_prod_lock, AcquireGreaterEqual, 1) + aie.dma_bd(%input_fifo_cons_buff_0 : memref<10xi32>, 0, 10) + aie.use_lock(%input_fifo_cons_cons_lock, Release, 1) + aie.next_bd ^bb2 + ^bb2: // pred: ^bb1 + aie.use_lock(%input_fifo_cons_prod_lock, AcquireGreaterEqual, 1) + aie.dma_bd(%input_fifo_cons_buff_1 : memref<10xi32>, 0, 10) + aie.use_lock(%input_fifo_cons_cons_lock, Release, 1) + aie.next_bd ^bb3 + ^bb3: // pred: ^bb2 + aie.use_lock(%input_fifo_cons_prod_lock, AcquireGreaterEqual, 1) + aie.dma_bd(%input_fifo_cons_buff_2 : memref<10xi32>, 0, 10) + aie.use_lock(%input_fifo_cons_cons_lock, Release, 1) + aie.next_bd ^bb1 + ^bb4: // pred: ^bb0 + %1 = aie.dma_start(MM2S, 0, ^bb5, ^bb7) + ^bb5: // 2 preds: ^bb4, ^bb6 + aie.use_lock(%output_fifo_cons_lock, AcquireGreaterEqual, 1) + aie.dma_bd(%output_fifo_buff_0 : memref<10xi32>, 0, 10) + aie.use_lock(%output_fifo_prod_lock, Release, 1) + aie.next_bd ^bb6 + ^bb6: // pred: ^bb5 + aie.use_lock(%output_fifo_cons_lock, AcquireGreaterEqual, 1) + aie.dma_bd(%output_fifo_buff_1 : memref<10xi32>, 0, 10) + aie.use_lock(%output_fifo_prod_lock, Release, 1) + aie.next_bd ^bb5 + ^bb7: // pred: ^bb4 + aie.end + } + } +} diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 366552907b..fc12961596 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,10 +9,11 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! + import numpy as np from aie.dialects.aie import * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit new file mode 100644 index 0000000000..51904bb335 --- /dev/null +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, valid_xchess_license +// +// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o +// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %S/aie.mlir +// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// CHECK: PASS! From 012d492ce3f95daeb6fd4dc4b800549bc2af69dc Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 10:13:04 -0700 Subject: [PATCH 42/46] Use only mlir for testing --- .../sliding_window_conditional/aie2.py | 8 -------- .../sliding_window_conditional/run.lit | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index fc12961596..b4417d2a2e 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -5,14 +5,6 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license -# -# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o -# RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s -# CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit index 51904bb335..68033f00d2 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit @@ -4,7 +4,7 @@ // REQUIRES: ryzen_ai, valid_xchess_license // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o -// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %S/aie.mlir +// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --dynamic-objFifos --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %S/aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags // RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! From 1052567abeef9681138c0cc7271386aa517e7602 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 13:46:40 -0700 Subject: [PATCH 43/46] Correct MLIR code --- .../sliding_window_conditional/aie.mlir | 79 +++++++++---------- .../sliding_window_conditional/aie2.py | 73 ----------------- 2 files changed, 39 insertions(+), 113 deletions(-) delete mode 100644 test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir index cfe608eed0..5ee3d1aa0d 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir @@ -1,10 +1,3 @@ -//===- aie.mlir ------------------------------------------------*- MLIR -*-===// -// -// Copyright (C) 2024, Advanced Micro Devices, Inc. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - module { aie.device(npu1_1col) { memref.global "public" @output_fifo_cons : memref<10xi32> @@ -29,22 +22,23 @@ module { %input_fifo_cons_lock = aie.lock(%tile_0_0, 1) {init = 0 : i32, sym_name = "input_fifo_cons_lock"} aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0) aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0) - %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xindex> + %buffer_0_2 = aie.buffer(%tile_0_2) : memref<2xi32> %core_0_2 = aie.core(%tile_0_2) { - %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 %c0_0 = arith.constant 0 : index - %c2 = arith.constant 2 : index - memref.store %c0, %buffer_0_2[%c0_0] : memref<2xindex> + %c2_i32 = arith.constant 2 : i32 + memref.store %c0_i32, %buffer_0_2[%c0_0] : memref<2xi32> %c1 = arith.constant 1 : index - %c3 = arith.constant 3 : index - memref.store %c0, %buffer_0_2[%c1] : memref<2xindex> + %c3_i32 = arith.constant 3 : i32 + memref.store %c0_i32, %buffer_0_2[%c1] : memref<2xi32> %c0_1 = arith.constant 0 : index %c10 = arith.constant 10 : index %c1_2 = arith.constant 1 : index scf.for %arg0 = %c0_1 to %c10 step %c1_2 { aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) - %0 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> - %1 = scf.index_switch %0 -> memref<10xi32> + %0 = memref.load %buffer_0_2[%c0_0] : memref<2xi32> + %100 = arith.index_cast %0 : i32 to index + %1 = scf.index_switch %100 -> memref<10xi32> case 0 { scf.yield %output_fifo_buff_0 : memref<10xi32> } @@ -59,8 +53,9 @@ module { %4 = arith.cmpi eq, %arg0, %3 : index scf.if %2 { aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) - %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %9 = scf.index_switch %8 -> memref<10xi32> + %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %800 = arith.index_cast %8 : i32 to index + %9 = scf.index_switch %800 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -77,8 +72,9 @@ module { } else { scf.if %4 { aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 2) - %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %9 = scf.index_switch %8 -> memref<10xi32> + %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %800 = arith.index_cast %8 : i32 to index + %9 = scf.index_switch %800 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -91,8 +87,9 @@ module { default { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } - %10 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %11 = scf.index_switch %10 -> memref<10xi32> + %10 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %1000 = arith.index_cast %10 : i32 to index + %11 = scf.index_switch %1000 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } @@ -107,14 +104,15 @@ module { } func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () aie.use_lock(%input_fifo_cons_prod_lock, Release, 2) - %12 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %c2_4 = arith.constant 2 : index - %13 = arith.addi %12, %c2_4 : index - %14 = arith.remsi %13, %c3 : index - memref.store %14, %buffer_0_2[%c1] : memref<2xindex> + %12 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %c2_4 = arith.constant 2 : i32 + %13 = arith.addi %12, %c2_4 : i32 + %14 = arith.remsi %13, %c3_i32 : i32 + memref.store %14, %buffer_0_2[%c1] : memref<2xi32> } else { - %8 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %9 = scf.index_switch %8 -> memref<10xi32> + %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %800 = arith.index_cast %8 : i32 to index + %9 = scf.index_switch %800 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -127,8 +125,9 @@ module { default { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } - %10 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %11 = scf.index_switch %10 -> memref<10xi32> + %10 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %1000 = arith.index_cast %10 : i32 to index + %11 = scf.index_switch %1000 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } @@ -143,19 +142,19 @@ module { } func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () aie.use_lock(%input_fifo_cons_prod_lock, Release, 1) - %12 = memref.load %buffer_0_2[%c1] : memref<2xindex> - %c1_4 = arith.constant 1 : index - %13 = arith.addi %12, %c1_4 : index - %14 = arith.remsi %13, %c3 : index - memref.store %14, %buffer_0_2[%c1] : memref<2xindex> + %12 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %c1_4 = arith.constant 1 : i32 + %13 = arith.addi %12, %c1_4 : i32 + %14 = arith.remsi %13, %c3_i32 : i32 + memref.store %14, %buffer_0_2[%c1] : memref<2xi32> } } aie.use_lock(%output_fifo_cons_lock, Release, 1) - %5 = memref.load %buffer_0_2[%c0_0] : memref<2xindex> - %c1_3 = arith.constant 1 : index - %6 = arith.addi %5, %c1_3 : index - %7 = arith.remsi %6, %c2 : index - memref.store %7, %buffer_0_2[%c0_0] : memref<2xindex> + %5 = memref.load %buffer_0_2[%c0_0] : memref<2xi32> + %c1_3 = arith.constant 1 : i32 + %6 = arith.addi %5, %c1_3 : i32 + %7 = arith.remsi %6, %c2_i32 : i32 + memref.store %7, %buffer_0_2[%c0_0] : memref<2xi32> } aie.end } {link_with = "kernel.o"} diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py deleted file mode 100644 index b4417d2a2e..0000000000 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 AMD Inc. - - -import numpy as np - -from aie.dialects.aie import * -from aie.dialects.aiex import * -from aie.helpers.dialects.ext.scf import _for as range_ -from aie.extras.context import mlir_mod_ctx - -N = 100 -n_rows = 10 -dev = AIEDevice.npu1_1col -col = 0 - - -def sliding_window(): - with mlir_mod_ctx() as ctx: - - @device(dev) - def device_body(): - subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] - - # Tile declarations - ShimTile = tile(col, 0) - ComputeTile = tile(col, 2) - - # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) - - # AIE Core Function declarations - add_10_i32 = external_func( - "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] - ) - - # Set up compute tiles - @core(ComputeTile, "kernel.o") - def core_body(): - for i in range_(10): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - if i == 0: - elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - add_10_i32(elemInPre, elemInPre, elemOut) - elif i == 9: - elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsInPost[0], elemsInPost[1], elemOut) - of_in.release(ObjectFifoPort.Consume, 2) - else: - elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - add_10_i32(elemsIn[0], elemsIn[1], elemOut) - of_in.release(ObjectFifoPort.Consume, 1) - - of_out.release(ObjectFifoPort.Produce, 1) - - # To/from AIE-array data movement - tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] - - @runtime_sequence(tensor_ty, tensor_ty) - def sequence(A, C): - npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) - dma_wait(of_out) - - print(ctx.module) - - -sliding_window() From 5a664acd2f3d7d4300403255b0c3767577ccf55f Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 13:59:11 -0700 Subject: [PATCH 44/46] File name --- .../dynamic_object_fifo/sliding_window_conditional/run.lit | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit index 68033f00d2..6220c2ec10 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/run.lit @@ -4,7 +4,7 @@ // REQUIRES: ryzen_ai, valid_xchess_license // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o -// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --dynamic-objFifos --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %S/aie.mlir +// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt %S/aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! From 16477b91f3fc4569e077f3ef740e70031efaf132 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 14:30:02 -0700 Subject: [PATCH 45/46] Renaming variables --- .../sliding_window_conditional/aie.mlir | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir index 5ee3d1aa0d..87197925b1 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie.mlir @@ -37,8 +37,8 @@ module { scf.for %arg0 = %c0_1 to %c10 step %c1_2 { aie.use_lock(%output_fifo_prod_lock, AcquireGreaterEqual, 1) %0 = memref.load %buffer_0_2[%c0_0] : memref<2xi32> - %100 = arith.index_cast %0 : i32 to index - %1 = scf.index_switch %100 -> memref<10xi32> + %1 = arith.index_cast %0 : i32 to index + %2 = scf.index_switch %1 -> memref<10xi32> case 0 { scf.yield %output_fifo_buff_0 : memref<10xi32> } @@ -48,14 +48,14 @@ module { default { scf.yield %output_fifo_buff_0 : memref<10xi32> } - %2 = arith.cmpi eq, %arg0, %c0_1 : index - %3 = arith.subi %c10, %c1_2 : index - %4 = arith.cmpi eq, %arg0, %3 : index - scf.if %2 { + %3 = arith.cmpi eq, %arg0, %c0_1 : index + %4 = arith.subi %c10, %c1_2 : index + %5 = arith.cmpi eq, %arg0, %4 : index + scf.if %3 { aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 1) %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> - %800 = arith.index_cast %8 : i32 to index - %9 = scf.index_switch %800 -> memref<10xi32> + %9 = arith.index_cast %8 : i32 to index + %10 = scf.index_switch %9 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -68,13 +68,13 @@ module { default { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } - func.call @add_10_i32(%9, %9, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + func.call @add_10_i32(%10, %10, %2) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () } else { - scf.if %4 { + scf.if %5 { aie.use_lock(%input_fifo_cons_cons_lock, AcquireGreaterEqual, 2) %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> - %800 = arith.index_cast %8 : i32 to index - %9 = scf.index_switch %800 -> memref<10xi32> + %9 = arith.index_cast %8 : i32 to index + %10 = scf.index_switch %9 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -87,9 +87,9 @@ module { default { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } - %10 = memref.load %buffer_0_2[%c1] : memref<2xi32> - %1000 = arith.index_cast %10 : i32 to index - %11 = scf.index_switch %1000 -> memref<10xi32> + %11 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %12 = arith.index_cast %11 : i32 to index + %13 = scf.index_switch %12 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } @@ -102,17 +102,17 @@ module { default { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } - func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + func.call @add_10_i32(%10, %13, %2) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () aie.use_lock(%input_fifo_cons_prod_lock, Release, 2) - %12 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %14 = memref.load %buffer_0_2[%c1] : memref<2xi32> %c2_4 = arith.constant 2 : i32 - %13 = arith.addi %12, %c2_4 : i32 - %14 = arith.remsi %13, %c3_i32 : i32 - memref.store %14, %buffer_0_2[%c1] : memref<2xi32> + %15 = arith.addi %14, %c2_4 : i32 + %16 = arith.remsi %15, %c3_i32 : i32 + memref.store %16, %buffer_0_2[%c1] : memref<2xi32> } else { %8 = memref.load %buffer_0_2[%c1] : memref<2xi32> - %800 = arith.index_cast %8 : i32 to index - %9 = scf.index_switch %800 -> memref<10xi32> + %9 = arith.index_cast %8 : i32 to index + %10 = scf.index_switch %9 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } @@ -125,9 +125,9 @@ module { default { scf.yield %input_fifo_cons_buff_0 : memref<10xi32> } - %10 = memref.load %buffer_0_2[%c1] : memref<2xi32> - %1000 = arith.index_cast %10 : i32 to index - %11 = scf.index_switch %1000 -> memref<10xi32> + %11 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %12 = arith.index_cast %11 : i32 to index + %13 = scf.index_switch %12 -> memref<10xi32> case 0 { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } @@ -140,21 +140,21 @@ module { default { scf.yield %input_fifo_cons_buff_1 : memref<10xi32> } - func.call @add_10_i32(%9, %11, %1) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () + func.call @add_10_i32(%10, %13, %2) : (memref<10xi32>, memref<10xi32>, memref<10xi32>) -> () aie.use_lock(%input_fifo_cons_prod_lock, Release, 1) - %12 = memref.load %buffer_0_2[%c1] : memref<2xi32> + %14 = memref.load %buffer_0_2[%c1] : memref<2xi32> %c1_4 = arith.constant 1 : i32 - %13 = arith.addi %12, %c1_4 : i32 - %14 = arith.remsi %13, %c3_i32 : i32 - memref.store %14, %buffer_0_2[%c1] : memref<2xi32> + %15 = arith.addi %14, %c1_4 : i32 + %16 = arith.remsi %15, %c3_i32 : i32 + memref.store %16, %buffer_0_2[%c1] : memref<2xi32> } } aie.use_lock(%output_fifo_cons_lock, Release, 1) - %5 = memref.load %buffer_0_2[%c0_0] : memref<2xi32> + %6 = memref.load %buffer_0_2[%c0_0] : memref<2xi32> %c1_3 = arith.constant 1 : i32 - %6 = arith.addi %5, %c1_3 : i32 - %7 = arith.remsi %6, %c2_i32 : i32 - memref.store %7, %buffer_0_2[%c0_0] : memref<2xi32> + %7 = arith.addi %6, %c1_3 : i32 + %8 = arith.remsi %7, %c2_i32 : i32 + memref.store %8, %buffer_0_2[%c0_0] : memref<2xi32> } aie.end } {link_with = "kernel.o"} From 229378efb54643e2d06f94d32cce00db1a908c3d Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Thu, 5 Dec 2024 14:41:52 -0700 Subject: [PATCH 46/46] Check: Does flag positions impact(?) --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/reduction/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 3 ++- .../dynamic_object_fifo/two_core_sliding_window/aie2.py | 3 ++- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index e9dc107466..7c2b664a65 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir # RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py index 0a8c1112d8..19dc7c6e3c 100644 --- a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py index eb5440e4cd..4814d27dae 100644 --- a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 37222b8a78..129b69eae5 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -9,10 +9,11 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! + from aie.dialects.aie import * from aie.dialects.aiex import * from aie.helpers.dialects.ext.scf import _for as range_ diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index d0b0f53d36..a48d6149ba 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -9,10 +9,11 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --dynamic-objFifos --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! + import numpy as np from aie.dialects.aie import *