Skip to content

Commit

Permalink
[WIP] add expand_shape to encoding
Browse files Browse the repository at this point in the history
Signed-off-by: Alan Li <[email protected]>
  • Loading branch information
lialan committed Aug 7, 2024
1 parent 6b85ee7 commit 29a03ef
Showing 1 changed file with 65 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@

#include "iree/compiler/Codegen/Common/EncodingUtils.h"
#include "iree/compiler/Codegen/Common/GPU/Passes.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#define DEBUG_TYPE "iree-codegen-gpu-materialize-encoding"
Expand Down Expand Up @@ -36,8 +39,10 @@ getIntrinsicVectorSize(TypeRange elementTypes, int64_t roleIdx) {
Type rhs = elementTypes[1];
Type out = elementTypes[2];
if (lhs.isF32() && rhs.isF32() && out.isF32()) {
if (roleIdx == 0 || roleIdx == 1) return std::make_pair(1, 1);
if (roleIdx == 2) return std::make_pair(4, 1);
if (roleIdx == 0 || roleIdx == 1)
return std::make_pair(1, 1);
if (roleIdx == 2)
return std::make_pair(4, 1);
}
return std::nullopt;
}
Expand Down Expand Up @@ -144,6 +149,7 @@ struct GPUSetEncodingOpLoweringConversion
auto elemTypes = llvm::map_to_vector(
encoding.getElementTypes().getValue(),
[](Attribute a) { return cast<TypeAttr>(a).getValue(); });
auto loc = encodingOp.getLoc();
std::optional<TileMxNxK> intrinsicShape = getIntrinsicSize(elemTypes);
std::optional<std::pair<int64_t, int64_t>> intrinsicVectorShape =
getIntrinsicVectorSize(elemTypes, roleIdx);
Expand All @@ -152,18 +158,18 @@ struct GPUSetEncodingOpLoweringConversion
}

SmallVector<int64_t> targetShape; // for unrolling
switch(roleIdx) {
case 0:
targetShape = {intrinsicShape->M, intrinsicShape->K};
break;
case 1:
targetShape = {intrinsicShape->N, intrinsicShape->K};
break;
case 2:
targetShape = {intrinsicShape->M, intrinsicShape->N};
break;
default:
return failure();
switch (roleIdx) {
case 0: // A
targetShape = {intrinsicShape->M, intrinsicShape->K};
break;
case 1: // B
targetShape = {intrinsicShape->N, intrinsicShape->K};
break;
case 2: // C
targetShape = {intrinsicShape->M, intrinsicShape->N};
break;
default:
return failure();
}

assert(innerTiles.size() == targetShape.size());
Expand All @@ -175,15 +181,52 @@ struct GPUSetEncodingOpLoweringConversion
assert(packedShape == targetShape);
}

// TODO(lialan): create expand_shape. Take LHS as an example:
// 16x4xf32 -> 16x1x4x1. Because the vector size used in the intrinsic is
// 1x1.
// For C-Matrix (i.e., ACC), it is 16x16xf32 -> 4x4x16x1xf32. Because the
// vector size is 4x1.
// Check that the dimensions of the matrix can be divided by the tile shape,
// if not then bail out.
auto sourceType = encodingOp.getSourceType().getShape();
assert(sourceType.size() == 2);
if (sourceType[0] % innerTiles[0] == 0 ||
sourceType[1] % innerTiles[1] == 0) {
return failure();
}

// TODO(lialan): create linalg.transpose op.
// LHS: 16x1x4x1 -> 4x16x1x1 (perm = [2, 0, 3, 1])
// ACC: 4x4x16x1 -> 4x16x4x1 (perm = [0, 2, 1, 3])
// Create expand_shape
llvm::SmallVector<int64_t> expandShapeShape;
auto [iT1, iT2] = *intrinsicVectorShape;
auto oT1 = sourceType[0] / iT1;
auto oT2 = sourceType[1] / iT2;
expandShapeShape = {oT1, iT1, oT2, iT2};
assert(expandShapeShape.size() == 4);
RankedTensorType expandShapeType =
RankedTensorType::Builder(encodingOp.getSourceType())
.setShape(expandShapeShape);
Value expandShapeOp = rewriter.create<tensor::ExpandShapeOp>(
loc, expandShapeType, packOp->getResult());

// create linalg.transpose
// LHS/RHS:
// OuterTileX x InnerTileX x OuterTileY x InnerTileY
// -> OuterTileY x OuterTileX x InnerTileY x InnerTileX
// (perm = [2, 0, 3, 1])
//
// ACC:
// OuterTileX x InnerTileX x OuterTileY x InnerTileY
// -> OuterTileX x OuterTileY x InnerTileX x InnerTileY
//(perm = [0, 2, 1, 3])
ArrayRef<int64_t> permutation;
switch (roleIdx) {
case 0: // A
case 1: // B
permutation = {2, 0, 3, 1};
break;
case 2: // C
permutation = {0, 2, 1, 3};
break;
}
auto emptyTensor = rewriter.create<tensor::EmptyOp>(
loc, expandShapeShape, encodingOp.getSourceType());
[[maybe_unused]] auto transposeOp = rewriter.create<linalg::TransposeOp>(
loc, expandShapeOp, emptyTensor, permutation);

// TODO(hanchung): We want to make the shape consistent, so we need to
// collpase and expand the shape. This is the shape we materialize for Flow
Expand Down

0 comments on commit 29a03ef

Please sign in to comment.