-
Notifications
You must be signed in to change notification settings - Fork 107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Change offset
, len
syntax in dma_bd
#1137
base: main
Are you sure you want to change the base?
Changes from all commits
309fb7a
7d82c32
a4cc95d
16b00e2
5c753d0
53d9b87
bcf813f
c92f0ce
af0502f
4f1f103
6497353
f39365b
0d436b8
e192186
a86f4c4
eee297e
2b4c4cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -263,7 +263,7 @@ def AIE_ShimDMAOp: AIE_Op<"shim_dma", [ | |
aie.dma_start(MM2S, 0, ^bd0, ^end) | ||
^bd0: | ||
aie.use_lock(%lock1, Acquire, 1) | ||
aie.dma_bd(%buf : memref<512 x i16>, 0, 512) | ||
aie.dma_bd(%buf : memref<512 x i16>) { offset = 0 : i32, len = 512 : i32 } | ||
aie.use_lock(%lock1, Release, 0) | ||
aie.next_bd ^bd0 | ||
^end: | ||
|
@@ -745,46 +745,67 @@ def AIE_DMABDPACKETOp: AIE_Op<"dma_bd_packet", []> { | |
}]; | ||
} | ||
|
||
def AIE_DMABDOp: AIE_Op<"dma_bd", []> { | ||
let summary = "Declare a dma block descriptor op"; | ||
def AIE_DMABDOp: AIE_Op<"dma_bd", [ | ||
ParentOneOf<["MemOp", "MemTileDMAOp", "ShimDMAOp", "DMAOp"]>, | ||
]> { | ||
let summary = "Declare a dma buffer descriptor op"; | ||
let description = [{ | ||
This operation describes a block descriptor for DMA operations. In particular, it specifies | ||
what buffer addresss to use, the transfer length, and the buffer type (A or B). | ||
This operation describes a buffer descriptor for DMA operations. In particular, it specifies | ||
what buffer to use, and optionally: | ||
|
||
This operation must be used in an MLIR block that lives inside a MemOp's region. | ||
The block descriptor specifies what lock to use and the buffer configuration. | ||
1. the offset into the buffer; | ||
2. the transfer length; | ||
3. the sizes and strides for n-d tensor addressing (described below); | ||
4. the "bd_id" with which to associate the buffer descriptor (most often left empty). | ||
|
||
`offset`, `len`, `size`s and `stride`s are all denominated in element width; e.g., transferring the whole of | ||
`memref<512xi32>` means `len == 512`, and also while transferring the whole of `memref<512xi16>`, `len == 512`. | ||
|
||
The only caveat to this "everything-is-in-terms-of-element-width" rule is regarding the inner-most dimension's stride | ||
(see [Important gotcha regarding strides](#important-gotcha-regarding-strides) below). | ||
|
||
`dma_bd` ops must appear in their own BBs (basic blocks) and such BBs can (optionally) include `use_lock` | ||
operations (specifying an "acquire" and a "release" lock; see the `use_lock` operation) and subsequent BDs in | ||
a "chain" of BDs (using `next_bd` as a "jump" to the next BB which contains a `dma_bd`). | ||
|
||
Example: | ||
``` | ||
// this defines a BD that uses lock %lck0 and buffer %buf0 | ||
^bd5: | ||
aie.use_lock(%lck, "Acquire", 0) | ||
aie.dma_bd(<$buf0 : memref<512xi32>, 0, 512>, 1) | ||
// transfer the first 32 elements of the memref | ||
aie.dma_bd(<$buf0 : memref<128xi32>) { offset = 0 : i32, len = 32 : i32 } | ||
aie.use_lock(%lck, "Release", 1) | ||
br ^bd6 // point to the next Block, which is also a different Block Descriptor | ||
aie.next_bd ^bd6 // point to the next bb, which describes the next buffer descriptor | ||
^bd6: | ||
aie.use_lock(%lck, "Acquire", 1) | ||
// transfer the last 32 elements of the memref | ||
aie.dma_bd(<$buf1 : memref<128xi32>) { offset = 96 : i32, len = 32 : i32 } | ||
aie.use_lock(%lck, "Release", 0) | ||
aie.next_bd ^end | ||
|
||
... | ||
|
||
// this defines a BD that does not use any lock | ||
^bd8: | ||
aie.dma_bd(<$buf1 : memref<64xi32>, 0, 64>, 0) | ||
aie.dma_bd(<$buf2 : memref<64xi32>) { offset = 0 : i32, len = 64 : i32 } | ||
``` | ||
A DMA channel in a Memory Module can process one block descriptor after another by chaining them. | ||
There are 16 block descriptors per Memory Module. They are shared by four DMA channels. | ||
|
||
## Background/context: | ||
|
||
A DMA channel in a Memory Module can process one buffer descriptor after another by chaining them. | ||
There are 16 buffer descriptors per Core memory module and 48 buffer descriptors per Memtile memory module. | ||
They are shared by four DMA channels (or 12). | ||
|
||
## DMA Data Layout Transformations on AIE-ML Devices | ||
|
||
AIE-ML devices can apply data layout transformations at the buffer | ||
descriptor level. These transformation are described by strides and sizes in up to three dimensions (four | ||
dimensions on memtiles). Strides and sizes can be supplied to the `dma_bd` | ||
through an optional argument, an array of tuples `<size, stride>`. | ||
|
||
The first element of this array gives the _highest-dimension_ stride and | ||
size, the last element of the array gives the lowest-dimension. | ||
|
||
Strides are always expressed in units of `i32`s; this is an architectural | ||
requirement, as data is moved by the DMA at this fundamental size. | ||
through an optional argument, an array of "tuple-like" attributes `bd_dim_layout<size, stride>`. | ||
|
||
The first element of this array gives the outer-most dimension's stride and | ||
size, the last element of the array gives the inner-most dimension's stride and size. | ||
We can model the access pattern strides and sizes generate by a series of | ||
nested loops. In general, a set of strides and sizes like this... | ||
|
||
|
@@ -820,13 +841,20 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { | |
for(int k = 0; k < 8 /*size_0*/; k++) | ||
// access/store element at/to index (i * 16 /*stride_2*/ + j * 1 /*stride_1*/ + k * 2 /*stride_0*/) | ||
``` | ||
|
||
## Important gotcha regarding strides | ||
|
||
All strides are expressed in multiples of the element width (just like `len` and `offset`) | ||
**with the caveat that the inner-most dimension's stride must be 1**. | ||
}]; | ||
|
||
let arguments = ( | ||
ins AnyMemRef:$buffer, | ||
OptionalAttr<AIEI32Attr>:$offset, | ||
// in multiples of element width (not bytes) | ||
DefaultValuedOptionalAttr<AIEI32Attr, "0">:$offset, | ||
// in multiples of element width (not bytes) | ||
OptionalAttr<AIEI32Attr>:$len, | ||
OptionalAttr<BDDimLayoutArrayAttr>:$dimensions, | ||
OptionalAttr<BDDimLayoutArrayAttr>:$dims, | ||
OptionalAttr<AIEI32Attr>:$bd_id, | ||
// should never be assigned by user... | ||
OptionalAttr<AIEI32Attr>:$next_bd_id | ||
|
@@ -835,13 +863,21 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { | |
let hasVerifier = 1; | ||
|
||
let assemblyFormat = [{ | ||
`(` $buffer `:` type($buffer) (`,` $offset^)? (`,` $len^)? (`,` $dimensions^)? `)` attr-dict | ||
`(` $buffer `:` type($buffer) (`,` `dims` `=` $dims^)? `)` attr-dict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we still need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All attributes are always in the attribute dictionary (on the C++ side) but the reason I put dims here is because if you put them into |
||
}]; | ||
|
||
let extraClassDeclaration = [{ | ||
BufferOp getBufferOp(); | ||
int getOffsetValue() { return getOffset().value_or(0); } | ||
int getLenValue() { return getLen().value_or(getBuffer().getType().getNumElements()); } | ||
int32_t getBufferElementTypeWidthInBytes() { | ||
return getBuffer().getType().getElementTypeBitWidth() / 8; | ||
} | ||
int32_t getLenInBytes() { | ||
if (std::optional<int32_t> len = getLen(); len.has_value()) | ||
return len.value() * getBufferElementTypeWidthInBytes(); | ||
else | ||
return getBuffer().getType().getNumElements() * getBufferElementTypeWidthInBytes(); | ||
} | ||
int32_t getOffsetInBytes() { return getOffset() * getBufferElementTypeWidthInBytes(); } | ||
}]; | ||
|
||
let hasVerifier = 1; | ||
|
@@ -855,7 +891,7 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { | |
$_state.addOperands(buffer); | ||
$_state.addAttribute("offset", $_builder.getI32IntegerAttr(offset)); | ||
$_state.addAttribute("len", $_builder.getI32IntegerAttr(len)); | ||
$_state.addAttribute("dimensions", dims); | ||
$_state.addAttribute("dims", dims); | ||
}]> | ||
]; | ||
} | ||
|
@@ -878,7 +914,7 @@ def AIE_DMAStartOp: AIE_Op<"dma_start", [ | |
aie.dma_start("MM2S", 0, ^bd0, ^end) | ||
^bd0: | ||
aie.use_lock(%lock0, "Acquire", 0) | ||
aie.dma_bd(%buffer : memref<16 x f32>, 0, 16) | ||
aie.dma_bd(%buffer : memref<16 x f32>) { offset = 0 : i32, len = 16 : i32 } | ||
aie.use_lock(%lock0, "Release", 1) | ||
br ^bd0 | ||
^end: | ||
|
@@ -962,7 +998,7 @@ def AIE_MemOp: AIE_Op<"mem", [ | |
%srcDma = aie.dma_start("S2MM", 0, ^bd0, ^end) | ||
^bd0: | ||
aie.use_lock(%lock, "Acquire", 0) | ||
aie.dma_bd(%buf : memref<64xi16>, 0, 64) | ||
aie.dma_bd(%buf : memref<64xi16>) { offset = 0 : i32, len = 64 : i32 } | ||
aie.use_lock(%lock, "Release", 1) | ||
aie.next_bd ^bd0 | ||
^end: | ||
|
@@ -1056,7 +1092,7 @@ def AIE_NextBDOp: AIE_Op<"next_bd", [ | |
%srcDma = aie.dma_start("S2MM", 0, ^bd0, ^end) | ||
^bd0: | ||
aie.use_lock(%lock, "Acquire", 0) | ||
aie.dma_bd(%buf : memref<64xi16>, 0, 64) | ||
aie.dma_bd(%buf : memref<64xi16>) { offset = 0 : i32, len = 64 : i32 } | ||
aie.use_lock(%lock, "Release", 1) | ||
aie.next_bd ^bd0 | ||
^end: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think GitHub does not let me add comments to lines you didn't change, but line 833 needs to be changed to use the new syntax for dims:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part of the PR, which is only about moving
offset
andlen
out into theattr-dict
, hasn't been decided on yet (and I haven't rebased either after the first part landed). But I'll try to remember that if it gets green-lit.