@@ -398,6 +398,28 @@ air::getLockValuePair(const AIE::AIETargetModel &targetModel,
398
398
unique_write_buffers.size ());
399
399
}
400
400
401
+ // Helper function that tries to retrieve the underlying AIE::BufferOp by
402
+ // unwrapping common memref wrappers (cast or subview)
403
+ AIE::BufferOp getUnderlyingBufferOp (Value buffer) {
404
+ // Case 1: Directly defined by an AIE::BufferOp
405
+ if (auto bufferOp = buffer.getDefiningOp <AIE::BufferOp>())
406
+ return bufferOp;
407
+
408
+ // Case 2: Defined by a cast (e.g., memref.cast)
409
+ if (auto castOp = buffer.getDefiningOp <CastOpInterface>())
410
+ if (auto innerBuffer = castOp->getOperand (0 ).getDefiningOp <AIE::BufferOp>())
411
+ return innerBuffer;
412
+
413
+ // Case 3: Defined by a view-like op (e.g., memref.subview)
414
+ if (auto viewLikeOp = buffer.getDefiningOp <ViewLikeOpInterface>())
415
+ if (auto innerBuffer =
416
+ viewLikeOp->getOperand (0 ).getDefiningOp <AIE::BufferOp>())
417
+ return innerBuffer;
418
+
419
+ // No underlying BufferOp found
420
+ return nullptr ;
421
+ }
422
+
401
423
// allocation_info_t impl.
402
424
403
425
bool xilinx::air::allocation_info_t::valid () { return dma_tile != nullptr ; }
@@ -745,12 +767,7 @@ air::TileDMAAllocator::getBuffer(uint64_t, int64_t col, int64_t row,
745
767
Value buffer = isTileInbound (memcpyOp, DMAMemorySpaceAsInt).value ()
746
768
? (memcpyOp.getDstMemref ())
747
769
: (memcpyOp.getSrcMemref ());
748
- AIE::BufferOp bufferOp = buffer.getDefiningOp <AIE::BufferOp>();
749
- // Memref cast
750
- memref::CastOp castOp = buffer.getDefiningOp <memref::CastOp>();
751
- if (!bufferOp && castOp)
752
- bufferOp = castOp.getOperand ().getDefiningOp <AIE::BufferOp>();
753
- return bufferOp;
770
+ return getUnderlyingBufferOp (buffer);
754
771
}
755
772
756
773
// ShimDMAAllocator impl.
@@ -1040,12 +1057,102 @@ air::MemTileDMAAllocator::getBuffer(uint64_t, int64_t col, int64_t row,
1040
1057
Value buffer = isTileInbound (memcpyOp, DMAMemorySpaceAsInt).value ()
1041
1058
? (memcpyOp.getDstMemref ())
1042
1059
: (memcpyOp.getSrcMemref ());
1043
- AIE::BufferOp bufferOp = buffer.getDefiningOp <AIE::BufferOp>();
1044
- // Memref cast
1045
- memref::CastOp castOp = buffer.getDefiningOp <memref::CastOp>();
1046
- if (!bufferOp && castOp)
1047
- bufferOp = castOp.getOperand ().getDefiningOp <AIE::BufferOp>();
1048
- return bufferOp;
1060
+ return getUnderlyingBufferOp (buffer);
1061
+ }
1062
+
1063
+ // CascadeAllocator impl.
1064
+
1065
+ // Attempts to allocate (or reuse) a cascade flow for the given memcpyOp.
1066
+ FailureOr<air::allocation_info_t >
1067
+ air::CascadeAllocator::coreCascadeAlloc (air::MemcpyInterface &memcpyOp) {
1068
+ // Determine if the operation is a cascade put (outbound)
1069
+ auto isCascadePut = isTileOutbound (memcpyOp, DMAMemorySpaceAsInt);
1070
+ if (failed (isCascadePut))
1071
+ return failure ();
1072
+
1073
+ // Select allocation list based on direction
1074
+ auto allocs =
1075
+ isCascadePut.value () ? &cascade_put_allocs : &cascade_get_allocs;
1076
+
1077
+ // Retrieve the buffer and the tile where this memcpyOp operates
1078
+ const int dummy{0 };
1079
+ auto buffer = getBuffer (dummy, -1 , -1 , memcpyOp);
1080
+ if (failed (buffer)) {
1081
+ return memcpyOp->emitOpError (" failed to get buffer." );
1082
+ }
1083
+ auto tile = buffer.value ().getTileOp ();
1084
+ if (!tile) {
1085
+ return buffer.value ()->emitOpError (" failed to get AIE tile." );
1086
+ }
1087
+
1088
+ // Search for an existing allocation for this tile and memcpyOp
1089
+ for (auto &t : *allocs) {
1090
+ if (t.foundAlloc (tile.getCol (), tile.getRow (), memcpyOp))
1091
+ return t;
1092
+ }
1093
+
1094
+ // No existing allocation found, create a new one
1095
+ return air::CascadeAllocator::allocNewCascade (memcpyOp, tile);
1096
+ }
1097
+
1098
+ // Creates a new cascade allocation entry when no matching allocation exists.
1099
+ FailureOr<air::allocation_info_t >
1100
+ air::CascadeAllocator::allocNewCascade (air::MemcpyInterface &memcpyOp,
1101
+ AIE::TileOp tile) {
1102
+ if (!tile) {
1103
+ return memcpyOp.emitOpError (" failed to get the AIE tile. This indicates a "
1104
+ " potential error in the compilation flow." );
1105
+ }
1106
+
1107
+ // Determine if this is a cascade put or get
1108
+ auto isCascadePut = isTileOutbound (memcpyOp, DMAMemorySpaceAsInt);
1109
+ if (failed (isCascadePut))
1110
+ return failure ();
1111
+ auto allocs =
1112
+ isCascadePut.value () ? &cascade_put_allocs : &cascade_get_allocs;
1113
+
1114
+ // Check if allocation already exists for this tile
1115
+ for (auto &t : *allocs) {
1116
+ if (t.foundAlloc (tile.getCol (), tile.getRow ())) {
1117
+ t.memcpyOps .push_back (memcpyOp.getOperation ());
1118
+ return t;
1119
+ }
1120
+ // Also check for an allocation tied to the channel declaration
1121
+ if (t.foundAlloc (
1122
+ tile.getCol (), tile.getRow (),
1123
+ getChannelDeclarationThroughSymbol (
1124
+ dyn_cast<air::ChannelInterface>(memcpyOp.getOperation ())))) {
1125
+ t.memcpyOps .push_back (memcpyOp.getOperation ());
1126
+ return t;
1127
+ }
1128
+ }
1129
+
1130
+ // Create a new allocation_info_t entry for this tile
1131
+ air::allocation_info_t output = {tile,
1132
+ /* col*/ -1 ,
1133
+ /* row*/ -1 ,
1134
+ /* aie_chan*/ AIE::DMAChannel (),
1135
+ /* chan*/ -1 ,
1136
+ /* dma_id*/ std::vector<int >{},
1137
+ {memcpyOp.getOperation ()}};
1138
+ allocs->push_back (output);
1139
+ return output;
1140
+ }
1141
+
1142
+ // Retrieves the underlying AIE::BufferOp associated with the given memcpyOp.
1143
+ FailureOr<AIE::BufferOp>
1144
+ air::CascadeAllocator::getBuffer (uint64_t , int64_t col, int64_t row,
1145
+ air::MemcpyInterface &memcpyOp) {
1146
+ if (failed (isTileInbound (memcpyOp, DMAMemorySpaceAsInt)))
1147
+ return failure ();
1148
+
1149
+ // Select source or destination buffer depending on inbound/outbound
1150
+ Value buffer = isTileInbound (memcpyOp, DMAMemorySpaceAsInt).value ()
1151
+ ? (memcpyOp.getDstMemref ())
1152
+ : (memcpyOp.getSrcMemref ());
1153
+
1154
+ // Resolve the actual underlying buffer op
1155
+ return getUnderlyingBufferOp (buffer);
1049
1156
}
1050
1157
1051
1158
// MemcpyBundleAsFlow impl.
@@ -1061,6 +1168,7 @@ air::MemcpyBundleAsFlow::pushBackMemcpyOpToBundle(air::DmaMemcpyNdOp memcpyOp) {
1061
1168
MM2S_memspace_as_int =
1062
1169
llvm::cast<BaseMemRefType>(memcpyOp.getSrcMemref ().getType ())
1063
1170
.getMemorySpaceAsInt ();
1171
+ memcpyResourceType = " dma_stream" ;
1064
1172
return success ();
1065
1173
}
1066
1174
@@ -1095,6 +1203,7 @@ air::MemcpyBundleAsFlow::pushBackMemcpyOpToBundle(air::ChannelGetOp memcpyOp) {
1095
1203
S2MM_memspace_as_int =
1096
1204
llvm::cast<BaseMemRefType>(memcpyOp.getMemref ().getType ())
1097
1205
.getMemorySpaceAsInt ();
1206
+ memcpyResourceType = chan.getChannelType ().str ();
1098
1207
return success ();
1099
1208
}
1100
1209
@@ -1106,6 +1215,7 @@ air::MemcpyBundleAsFlow::pushBackMemcpyOpToBundle(air::ChannelPutOp memcpyOp) {
1106
1215
MM2S_memspace_as_int =
1107
1216
llvm::cast<BaseMemRefType>(memcpyOp.getMemref ().getType ())
1108
1217
.getMemorySpaceAsInt ();
1218
+ memcpyResourceType = chan.getChannelType ().str ();
1109
1219
return success ();
1110
1220
}
1111
1221
@@ -1128,6 +1238,7 @@ air::MemcpyBundleAsFlow::MemcpyBundleAsFlow(air::DmaMemcpyNdOp dmaMemcpyOp) {
1128
1238
std::vector<Operation *>());
1129
1239
S2MM = v1;
1130
1240
S2MM_alloc = std::vector<air::allocation_info_t >(numS2MMAllocs);
1241
+ memcpyResourceType = " dma_stream" ;
1131
1242
}
1132
1243
1133
1244
air::MemcpyBundleAsFlow::MemcpyBundleAsFlow (air::ChannelOp chan) {
@@ -1146,6 +1257,7 @@ air::MemcpyBundleAsFlow::MemcpyBundleAsFlow(air::ChannelOp chan) {
1146
1257
std::vector<Operation *>());
1147
1258
S2MM = v1;
1148
1259
S2MM_alloc = std::vector<air::allocation_info_t >(numS2MMAllocs);
1260
+ memcpyResourceType = chan.getChannelType ().str ();
1149
1261
}
1150
1262
1151
1263
} // namespace xilinx
@@ -1158,7 +1270,8 @@ LogicalResult air::simpleDMAChannelAllocation(
1158
1270
std::vector<air::MemcpyBundleAsFlow> &memcpy_flows,
1159
1271
air::ShimDMAAllocator &shim_dma_alloc,
1160
1272
air::MemTileDMAAllocator &memtile_dma_alloc,
1161
- TileDMAAllocator &tile_dma_alloc) {
1273
+ TileDMAAllocator &tile_dma_alloc,
1274
+ air::CascadeAllocator &core_cascade_alloc) {
1162
1275
for (auto &f : memcpy_flows) {
1163
1276
if (f.MM2S_memspace_as_int == (int )air::MemorySpace::L1) {
1164
1277
for (auto o : f.MM2S ) {
@@ -1172,10 +1285,18 @@ LogicalResult air::simpleDMAChannelAllocation(
1172
1285
int x = tile.getCol ();
1173
1286
int y = tile.getRow ();
1174
1287
1175
- auto alloc_res = tile_dma_alloc.simpleDmaChannelAlloc (
1176
- memcpyOpIf, x, y, f.MM2S_alloc .dma_channel .channel );
1177
- if (failed (alloc_res))
1178
- return failure ();
1288
+ FailureOr<air::allocation_info_t > alloc_res;
1289
+ if (f.memcpyResourceType == " dma_stream" ||
1290
+ f.memcpyResourceType == " dma_packet" ) {
1291
+ alloc_res = tile_dma_alloc.simpleDmaChannelAlloc (
1292
+ memcpyOpIf, x, y, f.MM2S_alloc .dma_channel .channel );
1293
+ if (failed (alloc_res))
1294
+ return failure ();
1295
+ } else if (f.memcpyResourceType == " cascade" ) {
1296
+ alloc_res = core_cascade_alloc.coreCascadeAlloc (memcpyOpIf);
1297
+ if (failed (alloc_res))
1298
+ return failure ();
1299
+ }
1179
1300
1180
1301
f.MM2S_alloc = alloc_res.value ();
1181
1302
if (!f.MM2S_alloc .valid ())
@@ -1195,10 +1316,19 @@ LogicalResult air::simpleDMAChannelAllocation(
1195
1316
int x = tile.getCol ();
1196
1317
int y = tile.getRow ();
1197
1318
1198
- auto alloc_res = tile_dma_alloc.simpleDmaChannelAlloc (
1199
- memcpyOpIf, x, y, f.S2MM_alloc [i].dma_channel .channel );
1200
- if (failed (alloc_res))
1201
- return failure ();
1319
+ FailureOr<air::allocation_info_t > alloc_res;
1320
+ if (f.memcpyResourceType == " dma_stream" ||
1321
+ f.memcpyResourceType == " dma_packet" ) {
1322
+ alloc_res = tile_dma_alloc.simpleDmaChannelAlloc (
1323
+ memcpyOpIf, x, y, f.S2MM_alloc [i].dma_channel .channel );
1324
+ if (failed (alloc_res))
1325
+ return failure ();
1326
+ } else if (f.memcpyResourceType == " cascade" ) {
1327
+ alloc_res = core_cascade_alloc.coreCascadeAlloc (memcpyOpIf);
1328
+ if (failed (alloc_res))
1329
+ return failure ();
1330
+ }
1331
+
1202
1332
f.S2MM_alloc [i] = alloc_res.value ();
1203
1333
if (!f.S2MM_alloc [i].valid ())
1204
1334
return failure ();
@@ -1210,6 +1340,12 @@ LogicalResult air::simpleDMAChannelAllocation(
1210
1340
if (f.MM2S_memspace_as_int == (int )air::MemorySpace::L2) {
1211
1341
for (auto o : f.MM2S ) {
1212
1342
auto memcpyOpIf = cast<air::MemcpyInterface>(o);
1343
+ // Report error if the data movement lowers to neither dma stream
1344
+ // (aie.flow) nor dma packet flow (aie.packet_flow).
1345
+ if (f.memcpyResourceType != " dma_stream" &&
1346
+ f.memcpyResourceType != " dma_packet" )
1347
+ return memcpyOpIf->emitOpError (" only supports dma_stream or "
1348
+ " dma_packet connections at L2 memory" );
1213
1349
auto alloc_res = memtile_dma_alloc.simpleDmaChannelAlloc (memcpyOpIf);
1214
1350
if (failed (alloc_res) || !alloc_res->valid ())
1215
1351
return failure ();
@@ -1220,6 +1356,13 @@ LogicalResult air::simpleDMAChannelAllocation(
1220
1356
for (size_t i = 0 ; i < f.S2MM .size (); i++) {
1221
1357
for (auto o : f.S2MM [i]) {
1222
1358
auto memcpyOpIf = cast<air::MemcpyInterface>(o);
1359
+ // Report error if the data movement lowers to neither dma stream
1360
+ // (aie.flow) nor dma packet flow (aie.packet_flow).
1361
+ if (f.memcpyResourceType != " dma_stream" &&
1362
+ f.memcpyResourceType != " dma_packet" )
1363
+ return memcpyOpIf->emitOpError (
1364
+ " only supports dma_stream or dma_packet connections at L2 "
1365
+ " memory" );
1223
1366
auto alloc_res = memtile_dma_alloc.simpleDmaChannelAlloc (memcpyOpIf);
1224
1367
if (failed (alloc_res) || !alloc_res->valid ())
1225
1368
return failure ();
@@ -1233,6 +1376,13 @@ LogicalResult air::simpleDMAChannelAllocation(
1233
1376
for (size_t i = 0 ; i < f.S2MM .size (); i++) {
1234
1377
for (auto o : f.MM2S ) {
1235
1378
auto memcpyOpIf = cast<air::MemcpyInterface>(o);
1379
+ // Report error if the data movement lowers to neither dma stream
1380
+ // (aie.flow) nor dma packet flow (aie.packet_flow).
1381
+ if (f.memcpyResourceType != " dma_stream" &&
1382
+ f.memcpyResourceType != " dma_packet" )
1383
+ return memcpyOpIf->emitOpError (
1384
+ " only supports dma_stream or dma_packet connections at L3 "
1385
+ " memory" );
1236
1386
auto alloc_res = shim_dma_alloc.allocNewDmaChannel (
1237
1387
memcpyOpIf, f.S2MM_alloc [i].getDmaTile ().getCol (),
1238
1388
f.S2MM_alloc [i].getDmaTile ().getRow (), f.S2MM [i]);
@@ -1251,6 +1401,12 @@ LogicalResult air::simpleDMAChannelAllocation(
1251
1401
}
1252
1402
for (auto o : f.S2MM .front ()) {
1253
1403
auto memcpyOpIf = cast<air::MemcpyInterface>(o);
1404
+ // Report error if the data movement lowers to neither dma stream
1405
+ // (aie.flow) nor dma packet flow (aie.packet_flow).
1406
+ if (f.memcpyResourceType != " dma_stream" &&
1407
+ f.memcpyResourceType != " dma_packet" )
1408
+ return memcpyOpIf->emitOpError (" only supports dma_stream or "
1409
+ " dma_packet connections at L3 memory" );
1254
1410
auto alloc_res = shim_dma_alloc.allocNewDmaChannel (
1255
1411
memcpyOpIf, f.MM2S_alloc .getDmaTile ().getCol (),
1256
1412
f.MM2S_alloc .getDmaTile ().getRow (), f.MM2S );
0 commit comments