|
| 1 | +# ./python/air/backend/xrt_backend.py -*- Python -*- |
| 2 | +# |
| 3 | +# Copyright (C) 2024, Advanced Micro Devices, Inc. |
| 4 | +# SPDX-License-Identifier: MIT |
| 5 | + |
| 6 | +import air.ir |
| 7 | +import air.passmanager |
| 8 | + |
| 9 | +from .abc import AirBackend |
| 10 | + |
| 11 | +import air.compiler.util |
| 12 | +import air.compiler.aircc.main as aircc |
| 13 | + |
| 14 | +import numpy as np |
| 15 | +import pyxrt as xrt |
| 16 | + |
| 17 | + |
| 18 | +class XRTBackend(AirBackend): |
| 19 | + """Main entry-point for the xrt based AIR backend. |
| 20 | +
|
| 21 | + Args: |
| 22 | + verbose: verbose |
| 23 | + xclbin: xclbin filename to use |
| 24 | + kernel: kernel name to use |
| 25 | + insts: instruction filename to use |
| 26 | + """ |
| 27 | + |
| 28 | + def __init__( |
| 29 | + self, |
| 30 | + verbose=False, |
| 31 | + xclbin="air.xclbin", |
| 32 | + kernel="MLIR_AIE", |
| 33 | + insts="air.insts.txt", |
| 34 | + ): |
| 35 | + super().__init__() |
| 36 | + self.opts_xclbin = xclbin |
| 37 | + self.opts_kernel = kernel |
| 38 | + self.opts_insts = insts |
| 39 | + self.verbose = verbose |
| 40 | + |
| 41 | + def __del__(self): |
| 42 | + self.unload() |
| 43 | + |
| 44 | + def compile(self, air_module: air.ir.Module, pipeline=None): |
| 45 | + """Compiles an AIR module for the NPU / XRT Runtime with aircc. |
| 46 | +
|
| 47 | + The module is expected to be AIR dialect IR. Unless 'pipeline' is |
| 48 | + specified, the the input IR is passed directly to aircc. If 'pipeline' |
| 49 | + is specified, it is passed to aircc as the 'pipeline' command line options. |
| 50 | +
|
| 51 | + Args: |
| 52 | + air_module: The MLIR module consisting of funcs in the AIR dialect. |
| 53 | + pipeline: aircc optimization pipeline to use. |
| 54 | + verbose: verbose |
| 55 | + Returns: |
| 56 | + An opaque, backend specific compiled artifact object that can be |
| 57 | + passed to `load`. |
| 58 | + """ |
| 59 | + |
| 60 | + with air.ir.Context(): |
| 61 | + |
| 62 | + if self.verbose: |
| 63 | + print("AIR Module:") |
| 64 | + print(air_module) |
| 65 | + |
| 66 | + aircc_options = [ |
| 67 | + "--device", |
| 68 | + "npu1_4col", |
| 69 | + "air.mlir", |
| 70 | + "-xchesscc", |
| 71 | + "-xbridge", |
| 72 | + "-o", |
| 73 | + self.opts_xclbin, |
| 74 | + ] |
| 75 | + |
| 76 | + if self.verbose: |
| 77 | + aircc_options = aircc_options + ["-v"] |
| 78 | + |
| 79 | + aircc.run(air_module, aircc_options) |
| 80 | + |
| 81 | + return air_module |
| 82 | + |
| 83 | + def load(self, module): |
| 84 | + """Load a compiled artifact into the air runtime. |
| 85 | +
|
| 86 | + Returns: A callable that can be used to invoke the loaded module. |
| 87 | + The callable takes a list of numpy arrays. Each numpy array is |
| 88 | + assumed to be an input/output tensor. The callable also returns a |
| 89 | + list of numpy arrays, one for each tensor.""" |
| 90 | + |
| 91 | + # create the device, xclbin and context |
| 92 | + self.device = xrt.device(0) |
| 93 | + self.xclbin = xrt.xclbin(self.opts_xclbin) |
| 94 | + self.device.register_xclbin(self.xclbin) |
| 95 | + self.context = xrt.hw_context(self.device, self.xclbin.get_uuid()) |
| 96 | + |
| 97 | + # find and load the kernel |
| 98 | + kernels = self.xclbin.get_kernels() |
| 99 | + try: |
| 100 | + xkernel = [k for k in kernels if self.opts_kernel in k.get_name()][0] |
| 101 | + except: |
| 102 | + print(f"Kernel '{self.opts_kernel}' not found in '{self.opts_xclbin}'") |
| 103 | + exit(-1) |
| 104 | + self.kernel = xrt.kernel(self.context, xkernel.get_name()) |
| 105 | + |
| 106 | + # load the instructions as a numpy array |
| 107 | + with open(self.opts_insts, "r") as f: |
| 108 | + instr_text = f.read().split("\n") |
| 109 | + instr_text = [l for l in instr_text if l != ""] |
| 110 | + self.instr_v = np.array([int(i, 16) for i in instr_text], dtype=np.uint32) |
| 111 | + |
| 112 | + self.bo_instr = xrt.bo( |
| 113 | + self.device, |
| 114 | + len(self.instr_v) * 4, |
| 115 | + xrt.bo.cacheable, |
| 116 | + self.kernel.group_id(0), |
| 117 | + ) |
| 118 | + self.bo_instr.write(self.instr_v, 0) |
| 119 | + |
| 120 | + # 1) create and sync the buffers |
| 121 | + # 2) invoke the kernel |
| 122 | + # 3) sync the buffers |
| 123 | + # 4) return the contents of the buffers |
| 124 | + def invoker(*args): |
| 125 | + |
| 126 | + # limit arg length to 5 |
| 127 | + if len(args) > 5: |
| 128 | + raise ValueError("Too many arguments") |
| 129 | + sizes_in_bytes = [a.size * a.itemsize for a in args] |
| 130 | + bos = [ |
| 131 | + xrt.bo(self.device, s, xrt.bo.host_only, self.kernel.group_id(i + 2)) |
| 132 | + for i, s in enumerate(sizes_in_bytes) |
| 133 | + ] |
| 134 | + |
| 135 | + self.bo_instr.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) |
| 136 | + for i, a in enumerate(args): |
| 137 | + bos[i].write(a, 0) |
| 138 | + bos[i].sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) |
| 139 | + |
| 140 | + h = self.kernel(self.bo_instr, len(self.instr_v), *bos) |
| 141 | + h.wait() |
| 142 | + |
| 143 | + for i, a in enumerate(args): |
| 144 | + bos[i].sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE) |
| 145 | + return tuple( |
| 146 | + [ |
| 147 | + bos[i].read(s, 0).view(args[i].dtype) |
| 148 | + for i, s in enumerate(sizes_in_bytes) |
| 149 | + ] |
| 150 | + ) |
| 151 | + |
| 152 | + return invoker |
| 153 | + |
| 154 | + def compile_and_load(self, module): |
| 155 | + """Compile and load a module in one step.""" |
| 156 | + c = self.compile(module) |
| 157 | + return self.load(c) |
| 158 | + |
| 159 | + def unload(self): |
| 160 | + """Unload any loaded module and shutdown the air runtime.""" |
| 161 | + self.kernel = None |
| 162 | + self.context = None |
| 163 | + self.xclbin = None |
| 164 | + self.device = None |
| 165 | + self.bo_instr = None |
| 166 | + self.instr_v = None |
0 commit comments