Implement DepthToSpace CDR mode recomposition

roberteg16 · roberteg16 · commit 6f409413a60c · 2025-06-18T13:34:03.000+01:00
diff --git a/src/Dialect/ONNX/Transforms/Recompose.cpp b/src/Dialect/ONNX/Transforms/Recompose.cpp
@@ -701,6 +701,151 @@ struct RecomposeGeluFromMulPattern : public OpRewritePattern<ONNXMulOp> {
   }
 };
 
+struct RecomposeDepthToSpaceCRD : public OpRewritePattern<ONNXReshapeOp> {
+  using OpRewritePattern<ONNXReshapeOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(
+      ONNXReshapeOp reshapeOp, PatternRewriter &rewriter) const final {
+    using namespace onnx_mlir;
+    Location loc = ONNXLoc<ONNXReshapeOp>(reshapeOp);
+
+    std::optional<DepthToSpaceRecompositionResult> result =
+        matchDepthToSpaceCRDPattern(reshapeOp);
+    if (!result) {
+      return failure();
+    }
+
+    MultiDialectBuilder<OnnxBuilder> create(rewriter, result->fusedLocation);
+    rewriter.replaceOp(
+        reshapeOp, create.onnx.createOpAndInferShapes<ONNXDepthToSpaceOp>(
+                       reshapeOp.getType(), result->input, result->blockSize,
+                       result->mode));
+    return success();
+  }
+
+  // Result of attempting recomposing DepthToSpace. Contains useful information
+  // for the matching
+  struct DepthToSpaceRecompositionResult {
+    Value input;
+    int64_t blockSize;
+    std::string mode;
+    Location fusedLocation;
+  };
+
+  static std::optional<DepthToSpaceRecompositionResult>
+  matchDepthToSpaceCRDPattern(ONNXReshapeOp reshapeOp) {
+    using namespace onnx_mlir;
+    // DepthToSpace mode CRD match:
+    // DepthToSpace(x) =
+    //   %r0 = reshape %x NxCxHxW -> NxC//(B*B)xBxBxHxW
+    //   %t  = transpose %r0 perm=[0, 1, 4, 2, 5, 3]
+    //   %r1 = reshape NxC//(B*B)xHxBxWxB -> NxC//(B*B)x(HxB)x(WxB)
+
+    ONNXReshapeOp r0;
+    ONNXTransposeOp t;
+    ONNXReshapeOp r1 = reshapeOp;
+
+    t = r1->getOperand(0).getDefiningOp<ONNXTransposeOp>();
+    if (!t) {
+      return reportFailureForCRDMode("missing transpose");
+    }
+    r0 = t->getOperand(0).getDefiningOp<ONNXReshapeOp>();
+    if (!r0) {
+      return reportFailureForCRDMode("missing first reshape");
+    }
+
+    auto hasShapedStaticType = [](Type ty) {
+      auto shapedType = dyn_cast<ShapedType>(ty);
+      return shapedType && shapedType.hasStaticShape();
+    };
+
+    const bool haveOperationsValidTy =
+        llvm::all_of(TypeRange{r0.getOperand(0).getType(), r0.getType(),
+                         t.getType(), r1.getType()},
+            hasShapedStaticType);
+    if (!haveOperationsValidTy) {
+      return reportFailureForCRDMode(
+          "pattern operations have no shaped static tensor types");
+    }
+
+    auto fstReshapeInTy = cast<ShapedType>(r0->getOperand(0).getType());
+    ArrayRef<int64_t> fstReshapeInShape = fstReshapeInTy.getShape();
+    const size_t fstReshapeInRank = fstReshapeInTy.getRank();
+    if (fstReshapeInRank != 4) {
+      return reportFailureForCRDMode("input rank is not 4D ");
+    }
+
+    auto fstReshapeOutTy = cast<ShapedType>(r0.getType());
+    ArrayRef<int64_t> fstReshapeOutShape = fstReshapeOutTy.getShape();
+    const size_t fstReshapeOutRank = fstReshapeOutTy.getRank();
+    if (fstReshapeOutRank != 6) {
+      return reportFailureForCRDMode("output rank of first reshape is not 6D");
+    }
+
+    // Check for concrete reshape pattern:
+    //   reshape %x NxCxHxW -> NxC//(B*B)xBxBxHxW
+    const int64_t blocksize = fstReshapeOutShape[2];
+    if (blocksize != fstReshapeOutShape[3]) {
+      return reportFailureForCRDMode("blocksize do not match in dim 2 and 3");
+    }
+
+    if (fstReshapeInShape[0] != fstReshapeOutShape[0] ||
+        fstReshapeInShape[1] != fstReshapeOutShape[1] * blocksize * blocksize ||
+        fstReshapeInShape[2] != fstReshapeOutShape[4] ||
+        fstReshapeInShape[3] != fstReshapeOutShape[5]) {
+      return reportFailureForCRDMode("unexpected first reshape result shape");
+    }
+
+    // Check for concrete permutation pattern:
+    //   transpose %r0 perm=[0, 1, 4, 2, 5, 3]
+    std::optional<ArrayAttr> permOpt = t.getPerm();
+    if (!permOpt) {
+      return reportFailureForCRDMode("missing permutation on transpose");
+    }
+
+    // Get transpose permutation
+    SmallVector<int64_t, 6> perms;
+    ArrayAttrIntVals(*permOpt, perms);
+
+    // Check for transpose permutation
+    constexpr std::array<int64_t, 6> expectedPerms = {0, 1, 4, 2, 5, 3};
+    if (perms != ArrayRef(expectedPerms)) {
+      return reportFailureForCRDMode("unexpected permutations");
+    }
+
+    // Check for concrete reshape pattern:
+    //   reshape NxC//(B*B)xHxBxWxB -> NxC//(B*B)x(HxB)x(WxB)
+    auto sndReshapeInTy = cast<ShapedType>(t.getType());
+    ArrayRef<int64_t> sndReshapeInShape = sndReshapeInTy.getShape();
+
+    auto sndReshapeOutTy = cast<ShapedType>(r1.getType());
+    ArrayRef<int64_t> sndReshapeOutShape = sndReshapeOutTy.getShape();
+    const size_t sndReshapeOutRank = sndReshapeOutTy.getRank();
+    if (sndReshapeOutRank != 4) {
+      return reportFailureForCRDMode("out rank of second reshape is not 4D");
+    }
+
+    if (sndReshapeInShape[0] != sndReshapeOutShape[0] ||
+        sndReshapeInShape[1] != sndReshapeOutShape[1] ||
+        sndReshapeInShape[2] * sndReshapeInShape[3] != sndReshapeOutShape[2] ||
+        sndReshapeInShape[4] * sndReshapeInShape[5] != sndReshapeOutShape[3]) {
+      return reportFailureForCRDMode("unexpected second reshape result shape");
+    }
+
+    Location fusedLocation = FusedLoc::get(
+        reshapeOp->getContext(), {r0->getLoc(), t->getLoc(), r1->getLoc()});
+
+    return DepthToSpaceRecompositionResult{
+        /*input=*/r0.getOperand(0), blocksize, /*mode=*/"CRD", fusedLocation};
+  }
+
+  static std::nullopt_t reportFailureForCRDMode(std::string msg) {
+    // Can disable line below if not needed.
+    LLVM_DEBUG(llvm::dbgs() << "DepthToSpace [CRD] failure: " << msg << "\n");
+    return std::nullopt;
+  }
+};
+
 struct RecomposeQLinearMatMulFromQuantizeLinearPattern
     : public OpRewritePattern<ONNXQuantizeLinearOp> {
   using OpRewritePattern<ONNXQuantizeLinearOp>::OpRewritePattern;
@@ -815,6 +960,11 @@ void RecomposeONNXToONNXPass::runOnOperation() {
     return true;
   });
 
+  // Recompose DepthToSpace starting from reshape op
+  target.addDynamicallyLegalOp<ONNXReshapeOp>([](ONNXReshapeOp op) {
+    return !RecomposeDepthToSpaceCRD::matchDepthToSpaceCRDPattern(op);
+  });
+
   // AMD Disabled
   // // Recompose QLinearMatMul, starting from QuantizeLinear.
   // // Pattern: DequanizeLinear + MatMul + QuantizeLinear.
@@ -841,6 +991,7 @@ void onnx_mlir::getRecomposeONNXToONNXPatterns(
   MLIRContext *context = patterns.getContext();
   patterns.insert<RecomposeGeluFromMulPattern>(context);
   patterns.insert<RecomposeLayerNormFromMulPattern>(context);
+  patterns.insert<RecomposeDepthToSpaceCRD>(context);
   // AMD Disabled as downstream has no special support for it
   // patterns.insert<RecomposeQLinearMatMulFromQuantizeLinearPattern>(context);
 }
diff --git a/test/mlir/onnx/onnx_recompose.mlir b/test/mlir/onnx/onnx_recompose.mlir
@@ -699,3 +699,79 @@ func.func @test_gelu_erf_two_adds(%arg0: tensor<?x?x3072xf32>, %arg1: tensor<307
 // CHECK:           [[VAR_3_:%.+]] = "onnx.MatMul"([[VAR_2_]], [[PARAM_1_]]) : (tensor<?x?x3072xf32>, tensor<3072x768xf32>) -> tensor<?x?x768xf32>
 // CHECK:           return [[VAR_3_]] : tensor<?x?x768xf32>
 // CHECK:         }
+
+// -----
+
+func.func @test_depth_to_space_cdr(%arg0: tensor<1x128x540x960xf32>) -> tensor<1x32x1080x1920xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 2, 2, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 32, 1080, 1920]> : tensor<4xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<1x128x540x960xf32>, tensor<6xi64>) -> tensor<1x32x2x2x540x960xf32>
+  %3 = "onnx.Transpose"(%2) {perm = [0, 1, 4, 2, 5, 3]} : (tensor<1x32x2x2x540x960xf32>) -> tensor<1x32x540x2x960x2xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<1x32x540x2x960x2xf32>, tensor<4xi64>) -> tensor<1x32x1080x1920xf32>
+  return %4 : tensor<1x32x1080x1920xf32>
+}
+// CHECK-LABEL:func.func @test_depth_to_space_cdr
+// CHECK-SAME:   (%[[PARAM_1:.+]]: tensor<1x128x540x960xf32>) -> tensor<1x32x1080x1920xf32>
+//      CHECK:  %[[DTS:.+]] = "onnx.DepthToSpace"(%[[PARAM_1]]) {blocksize = 2 : si64, mode = "CRD"} : (tensor<1x128x540x960xf32>) -> tensor<1x32x1080x1920xf32>
+//      CHECK:  return %[[DTS]] : tensor<1x32x1080x1920xf32>
+//      CHECK:}
+
+// -----
+
+func.func @test_depth_to_space_cdr_missing_transpose_perm(%arg0: tensor<1x128x540x960xf32>) -> tensor<1x32x1080x1920xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 2, 2, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 32, 1080, 1920]> : tensor<4xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<1x128x540x960xf32>, tensor<6xi64>) -> tensor<1x32x2x2x540x960xf32>
+  %3 = "onnx.Transpose"(%2) : (tensor<1x32x2x2x540x960xf32>) -> tensor<1x32x540x2x960x2xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<1x32x540x2x960x2xf32>, tensor<4xi64>) -> tensor<1x32x1080x1920xf32>
+  return %4 : tensor<1x32x1080x1920xf32>
+}
+// CHECK-NOT: onnx.DepthToSpace
+
+// -----
+
+func.func @test_depth_to_space_cdr_unexpected_first_reshape_result(%arg0: tensor<1x128x540x960xf32>) -> tensor<1x32x540x3840xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 1, 4, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 32, 524, 3840]> : tensor<4xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<1x128x540x960xf32>, tensor<6xi64>) -> tensor<1x32x1x4x540x960xf32>
+  %3 = "onnx.Transpose"(%2) {perm = [0, 1, 4, 2, 5, 3]} : (tensor<1x32x1x4x540x960xf32>) -> tensor<1x32x540x1x960x4xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<1x32x540x1x960x4xf32>, tensor<4xi64>) -> tensor<1x32x540x3840xf32>
+  return %4 : tensor<1x32x540x3840xf32>
+}
+// CHECK-NOT: onnx.DepthToSpace
+
+// -----
+
+func.func @test_depth_to_space_cdr_unexpected_perm(%arg0: tensor<1x128x540x960xf32>) -> tensor<1x32x1080x1920xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 2, 2, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 32, 1080, 1920]> : tensor<4xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<1x128x540x960xf32>, tensor<6xi64>) -> tensor<1x32x2x2x540x960xf32>
+  %3 = "onnx.Transpose"(%2) {perm = [0, 1, 4, 3, 5, 2]} : (tensor<1x32x2x2x540x960xf32>) -> tensor<1x32x540x2x960x2xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<1x32x540x2x960x2xf32>, tensor<4xi64>) -> tensor<1x32x1080x1920xf32>
+  return %4 : tensor<1x32x1080x1920xf32>
+}
+// CHECK-NOT: onnx.DepthToSpace
+
+// -----
+
+func.func @test_depth_to_space_cdr_unexpected_second_reshape_result(%arg0: tensor<1x128x540x960xf32>) -> tensor<1x1x32x1080x1920xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 2, 2, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 1, 32, 1080, 1920]> : tensor<5xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<1x128x540x960xf32>, tensor<6xi64>) -> tensor<1x32x2x2x540x960xf32>
+  %3 = "onnx.Transpose"(%2) {perm = [0, 1, 4, 2, 5, 3]} : (tensor<1x32x2x2x540x960xf32>) -> tensor<1x32x540x2x960x2xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<1x32x540x2x960x2xf32>, tensor<5xi64>) -> tensor<1x1x32x1080x1920xf32>
+  return %4 : tensor<1x1x32x1080x1920xf32>
+}
+// CHECK-NOT: onnx.DepthToSpace
+
+// -----
+
+func.func @test_depth_to_space_cdr_not_static_shapes(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  %0 = onnx.Constant dense<[-1, 32, 2, 2, 540, 960]> : tensor<6xi64>
+  %1 = onnx.Constant dense<[-1, 32, 1080, 1920]> : tensor<4xi64>
+  %2 = "onnx.Reshape"(%arg0, %0) {allowzero = 0 : si64} : (tensor<*xf32>, tensor<6xi64>) -> tensor<*xf32>
+  %3 = "onnx.Transpose"(%2) {perm = [0, 1, 4, 2, 5, 3]} : (tensor<*xf32>) -> tensor<*xf32>
+  %4 = "onnx.Reshape"(%3, %1) {allowzero = 0 : si64} : (tensor<*xf32>, tensor<4xi64>) -> tensor<*xf32>
+  return %4 : tensor<*xf32>
+}
+// CHECK-NOT: onnx.DepthToSpace