Skip to content

Commit 995003d

Browse files
Rename TensorTile to TensorAccessPattern (#1941)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 7419211 commit 995003d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1243
-1145
lines changed

Diff for: programming_examples/basic/dma_transpose/aie2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
from aie.dialects.aiex import *
1414
from aie.extras.context import mlir_mod_ctx
1515
from aie.helpers.dialects.ext.scf import _for as range_
16-
from aie.helpers.tensortiler import TensorTile
16+
from aie.helpers.taplib import TensorAccessPattern
1717

1818

1919
def my_passthrough(M, K, N, generate_access_map=False):
2020
tensor_ty = np.ndarray[(M, K), np.dtype[np.int32]]
21-
data_transform = TensorTile(
21+
data_transform = TensorAccessPattern(
2222
(M, K), offset=0, sizes=[1, 1, K, M], strides=[1, 1, 1, K]
2323
)
2424
if generate_access_map:
@@ -57,7 +57,7 @@ def sequence(A, B, C):
5757
metadata=of_in,
5858
bd_id=1,
5959
mem=A,
60-
tensor_tile=data_transform,
60+
tap=data_transform,
6161
issue_token=True,
6262
)
6363
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N])

Diff for: programming_examples/basic/dma_transpose/aie2_alt.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
from aie.dialects.aiex import *
1414
from aie.extras.context import mlir_mod_ctx
1515
from aie.helpers.dialects.ext.scf import _for as range_
16-
from aie.helpers.tensortiler import TensorTile
16+
from aie.helpers.taplib import TensorAccessPattern
1717

1818

1919
def my_passthrough(M, K, N, generate_access_map=False):
2020
tensor_ty = np.ndarray[(M, K), np.dtype[np.int32]]
21-
data_transform = TensorTile(
21+
data_transform = TensorAccessPattern(
2222
(M, K), offset=0, sizes=[1, 1, K, M], strides=[1, 1, 1, K]
2323
)
2424
if generate_access_map:
@@ -54,7 +54,7 @@ def sequence(A, B, C):
5454
# The strides below are configured to read across all rows in the same column
5555
# Stride of K in dim/wrap 2 skips an entire row to read a full column
5656
in_task = shim_dma_single_bd_task(
57-
of_in, A, tensor_tile=data_transform, issue_token=True
57+
of_in, A, tap=data_transform, issue_token=True
5858
)
5959
out_task = shim_dma_single_bd_task(
6060
of_out, C, sizes=[1, 1, 1, N], issue_token=True

Diff for: programming_examples/basic/matrix_multiplication/whole_array/aie2.py

+24-24
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from aie.dialects.aie import *
1515
from aie.dialects.aiex import *
1616
from aie.helpers.dialects.ext.scf import _for as range_
17-
from aie.helpers.tensortiler import TensorTile, TensorTileSequence
17+
from aie.helpers.taplib import TensorAccessPattern, TensorAccessSequence
1818

1919
dtype_map = {
2020
"bf16": bfloat16,
@@ -49,14 +49,14 @@ def main():
4949
)
5050
argparser.add_argument("--trace_size", type=int, default=0)
5151
argparser.add_argument(
52-
"--generate-tiles",
52+
"--generate-taps",
5353
action="store_true",
54-
help="Generate TensorTiles, a Python object to represent each data transfer"
54+
help="Generate TensorAccessPatterns, a Python object to represent each data transfer"
5555
"of the input/output matrices. These objects can be used for visualization.",
5656
)
5757
args = argparser.parse_args()
5858
with mlir_mod_ctx() as ctx:
59-
maybe_tiles = my_matmul(
59+
maybe_taps = my_matmul(
6060
args.M,
6161
args.K,
6262
args.N,
@@ -68,13 +68,13 @@ def main():
6868
args.dtype_out,
6969
args.b_col_maj,
7070
args.trace_size,
71-
args.generate_tiles,
71+
args.generate_taps,
7272
)
7373
# print(ctx.module.operation.verify())
7474
print(ctx.module)
7575

76-
if args.generate_tiles:
77-
return maybe_tiles
76+
if args.generate_taps:
77+
return maybe_taps
7878

7979

8080
def ceildiv(a, b):
@@ -93,7 +93,7 @@ def my_matmul(
9393
dtype_out_str,
9494
b_col_maj,
9595
trace_size,
96-
generate_tiles=False,
96+
generate_taps=False,
9797
):
9898
n_aie_rows = 4
9999
n_aie_cores = n_aie_rows * n_aie_cols
@@ -169,11 +169,11 @@ def my_matmul(
169169
elif n_aie_cols == 4:
170170
dev = AIEDevice.npu1_4col
171171

172-
# These will hold TensorTile objects that represent the runtime
173-
# npu_dma_memcpy_nd operations of this design. They are only used if generate_tiles is true
174-
A_tensor_tiles = []
175-
B_tensor_tiles = []
176-
C_tensor_tiles = []
172+
# These will hold TensorAccessPattern objects that represent the runtime
173+
# npu_dma_memcpy_nd operations of this design. They are only used if generate_taps is true
174+
A_taps = []
175+
B_taps = []
176+
C_taps = []
177177

178178
@device(dev)
179179
def device_body():
@@ -415,8 +415,8 @@ def sequence(A, B, C):
415415
# Use the calculated sizes/strides/offsets to record the data movement
416416
# caused by the above call to npu_dma_memcpy_nd.
417417
# This line does not change MLIR output at all.
418-
C_tensor_tiles.append(
419-
TensorTile(
418+
C_taps.append(
419+
TensorAccessPattern(
420420
(M, N),
421421
offset=C_offset,
422422
sizes=C_sizes,
@@ -469,8 +469,8 @@ def sequence(A, B, C):
469469
# Use the calculated sizes/strides/offsets to record the data movement
470470
# caused by the above call to npu_dma_memcpy_nd.
471471
# This line does not change MLIR output at all.
472-
A_tensor_tiles.append(
473-
TensorTile(
472+
A_taps.append(
473+
TensorAccessPattern(
474474
(M, K),
475475
offset=A_offset,
476476
sizes=A_sizes,
@@ -515,8 +515,8 @@ def sequence(A, B, C):
515515
# Use the calculated sizes/strides/offsets to record the data movement
516516
# caused by the above call to npu_dma_memcpy_nd.
517517
# This line does not change MLIR output at all.
518-
B_tensor_tiles.append(
519-
TensorTile(
518+
B_taps.append(
519+
TensorAccessPattern(
520520
(K, N),
521521
offset=B_col_offset,
522522
sizes=B_sizes,
@@ -527,13 +527,13 @@ def sequence(A, B, C):
527527
dma_wait(*C_l2l3_fifos)
528528
dma_wait(*C_l2l3_fifos)
529529

530-
if generate_tiles:
531-
# If generate tiles is true, return a representation of tensor tiles
530+
if generate_taps:
531+
# If generate_taps is true, return a representation of tensor tiles
532532
# representing all the npu_dma_memcpy_nd runtime sequence operations per input/ouput tensor.
533533
return (
534-
TensorTileSequence.from_tiles(A_tensor_tiles),
535-
TensorTileSequence.from_tiles(B_tensor_tiles),
536-
TensorTileSequence.from_tiles(C_tensor_tiles),
534+
TensorAccessSequence.from_taps(A_taps),
535+
TensorAccessSequence.from_taps(B_taps),
536+
TensorAccessSequence.from_taps(C_taps),
537537
)
538538

539539

Diff for: programming_examples/basic/matrix_multiplication/whole_array/aie2_tiler.py

+24-29
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from aie.dialects.aie import *
1515
from aie.dialects.aiex import *
1616
from aie.helpers.dialects.ext.scf import _for as range_
17-
from aie.helpers.tensortiler import TensorTiler2D, TensorTileSequence
17+
from aie.helpers.taplib import TensorTiler2D, TensorAccessSequence
1818

1919
dtype_map = {
2020
"bf16": bfloat16,
@@ -49,14 +49,14 @@ def main():
4949
)
5050
argparser.add_argument("--trace_size", type=int, default=0)
5151
argparser.add_argument(
52-
"--generate-tiles",
52+
"--generate-taps",
5353
action="store_true",
54-
help="Generate TensorTiles, a Python object to represent each data transfer"
54+
help="Generate TensorAccessPatterns, a Python object to represent each data transfer"
5555
"of the input/output matrices. These objects can be used for visualization.",
5656
)
5757
args = argparser.parse_args()
5858
with mlir_mod_ctx() as ctx:
59-
maybe_tiles = my_matmul(
59+
maybe_taps = my_matmul(
6060
args.M,
6161
args.K,
6262
args.N,
@@ -68,13 +68,13 @@ def main():
6868
args.dtype_out,
6969
args.b_col_maj,
7070
args.trace_size,
71-
args.generate_tiles,
71+
args.generate_taps,
7272
)
7373
# print(ctx.module.operation.verify())
7474
print(ctx.module)
7575

76-
if args.generate_tiles:
77-
return maybe_tiles
76+
if args.generate_taps:
77+
return maybe_taps
7878

7979

8080
def ceildiv(a, b):
@@ -93,7 +93,7 @@ def my_matmul(
9393
dtype_out_str,
9494
b_col_maj,
9595
trace_size,
96-
generate_tiles=False,
96+
generate_taps=False,
9797
):
9898
n_aie_rows = 4
9999
n_aie_cores = n_aie_rows * n_aie_cols
@@ -169,11 +169,11 @@ def my_matmul(
169169
elif n_aie_cols == 4:
170170
dev = AIEDevice.npu1_4col
171171

172-
# These will hold TensorTile objects that represent the runtime
173-
# npu_dma_memcpy_nd operations of this design. They are only used if generate_tiles is true
174-
A_tensor_tiles = []
175-
B_tensor_tiles = []
176-
C_tensor_tiles = []
172+
# These will hold TensorAccessPattern objects that represent the runtime
173+
# npu_dma_memcpy_nd operations of this design. They are only used if generate_taps is true
174+
A_taps = []
175+
B_taps = []
176+
C_taps = []
177177

178178
@device(dev)
179179
def device_body():
@@ -433,7 +433,7 @@ def sequence(A, B, C):
433433
for col in range(n_aie_cols):
434434

435435
# This line does not change MLIR output at all - it's just for recording data movement
436-
C_tensor_tiles.append(C_tiles[c_index])
436+
C_taps.append(C_tiles[c_index])
437437

438438
# C Output Transfer:
439439
# The smallest transfer unit is a (m*n_aie_rows)-x-(n)-sized sub-tile of the matrix.
@@ -457,16 +457,11 @@ def sequence(A, B, C):
457457
c_task = shim_dma_single_bd_task(
458458
C_l2l3_fifos[col],
459459
C,
460-
tensor_tile=C_tiles[c_index],
460+
tap=C_tiles[c_index],
461461
issue_token=True,
462462
)
463463
dma_start_task(c_task)
464464
out_tasks.append(c_task)
465-
466-
# Use the calculated sizes/strides/offsets to record the data movement
467-
# caused by the above call to npu_dma_memcpy_nd.
468-
# This line does not change MLIR output at all.
469-
C_tensor_tiles.append(C_tiles[c_index])
470465
c_index += 1
471466

472467
for tile_row in range(current_tb_n_rows):
@@ -495,14 +490,13 @@ def sequence(A, B, C):
495490
a_task = shim_dma_single_bd_task(
496491
A_l3l2_fifos[col],
497492
A,
498-
tensor_tile=A_tiles[tile_offset],
493+
tap=A_tiles[tile_offset],
499494
)
500495
dma_start_task(a_task)
501496
in_tasks.append(a_task)
502497
# Use the calculated sizes/strides/offsets to record the data movement
503498
# caused by the above call to npu_dma_memcpy_nd.
504499
# This line does not change MLIR output at all.
505-
A_tensor_tiles.append(A_tiles[tile_offset])
506500

507501
# B input transfer:
508502
# Transfer the first a (n)-wide block of columns of B,
@@ -525,13 +519,14 @@ def sequence(A, B, C):
525519
b_task = shim_dma_single_bd_task(
526520
B_l3l2_fifos[col],
527521
B,
528-
tensor_tile=B_tiles[col],
522+
tap=B_tiles[col],
529523
)
530524
dma_start_task(b_task)
531525
in_tasks.append(b_task)
532526

533527
# These lines do not change MLIR output at all - they are just for recording data movement
534-
B_tensor_tiles.append(B_tiles[col])
528+
A_taps.append(A_tiles[tile_offset])
529+
B_taps.append(B_tiles[col])
535530
if tb > 0 or (tb == 0 and pingpong > 0):
536531
dma_await_task(*out_tasks)
537532
out_tasks = []
@@ -542,13 +537,13 @@ def sequence(A, B, C):
542537
if len(in_tasks) > 0:
543538
dma_free_task(*in_tasks)
544539

545-
if generate_tiles:
546-
# If generate tiles is true, return a representation of tensor tiles
540+
if generate_taps:
541+
# If generate taps is true, return a representation of tensor access patterns
547542
# representing all the npu_dma_memcpy_nd runtime sequence operations per input/ouput tensor.
548543
return (
549-
TensorTileSequence.from_tiles(A_tensor_tiles),
550-
TensorTileSequence.from_tiles(B_tensor_tiles),
551-
TensorTileSequence.from_tiles(C_tensor_tiles),
544+
TensorAccessSequence.from_taps(A_taps),
545+
TensorAccessSequence.from_taps(B_taps),
546+
TensorAccessSequence.from_taps(C_taps),
552547
)
553548

554549

0 commit comments

Comments
 (0)