From 92526ea886a160e5b6c21554621769d78f0a4fdb Mon Sep 17 00:00:00 2001
From: Quinn Dawkins <quinn.dawkins@gmail.com>
Date: Thu, 7 Nov 2024 18:32:50 -0500
Subject: [PATCH] Integrate llvm-project@b358f21 (#19066)

- llvm::Type->getPointerTo was deprecated and replaced with
llvm::PointerType::get
 - affine.delinearize_index now takes mixed attr/value operands

Includes two fixes
 - SCF::TileAndFuseConsumerOfSlice was incorrectly determining all DPS
   ops to be tilable
 - A new PackOp + CastOp folder was dropping lowering configs
---
 .../plugins/target/LLVMCPU/LibraryBuilder.cpp | 139 ++++++++++--------
 .../GPU/test/gpu_distribute_forall.mlir       |  17 +--
 .../GPU/test/vector_reduction_to_gpu.mlir     |   2 +-
 ...optimize_tensor_insert_extract_slices.mlir |   2 +-
 .../test/reconcile_translation_info.mlir      |   5 +-
 .../Common/test/transform_flatten_forall.mlir |   2 +-
 .../test/distribute_lane_forall.mlir          |   6 +-
 .../test/transform_fuse_forall.mlir           |   6 +-
 .../test/distribute_mma_to_lanes.mlir         |  15 +-
 .../test/ROCDL/pipeline_tile_and_fuse.mlir    |   2 +-
 .../pipeline_vector_distribute_gfx940.mlir    |   2 +-
 third_party/llvm-project                      |   2 +-
 12 files changed, 97 insertions(+), 103 deletions(-)

diff --git a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp
index a4402ed6db0b..d0e8786c274b 100644
--- a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp
+++ b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp
@@ -6,6 +6,7 @@
 
 #include "compiler/plugins/target/LLVMCPU/LibraryBuilder.h"
 
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 
 // =============================================================================
@@ -42,13 +43,14 @@ static llvm::StructType *makeImportTableType(llvm::LLVMContext &context) {
   }
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
   auto *i8PtrType = llvm::PointerType::getUnqual(context);
-  auto *type = llvm::StructType::create(context,
-                                        {
-                                            i32Type,
-                                            i8PtrType->getPointerTo(),
-                                        },
-                                        "iree_hal_executable_import_table_v0_t",
-                                        /*isPacked=*/false);
+  auto *type =
+      llvm::StructType::create(context,
+                               {
+                                   i32Type,
+                                   llvm::PointerType::get(i8PtrType, 0),
+                               },
+                               "iree_hal_executable_import_table_v0_t",
+                               /*isPacked=*/false);
   return type;
 }
 
@@ -100,13 +102,14 @@ makeDispatchFunctionType(llvm::LLVMContext &context) {
   auto *dispatchStateType = makeDispatchStateType(context);
   auto *workgroupStateType = makeWorkgroupStateType(context);
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
-  return llvm::FunctionType::get(i32Type,
-                                 {
-                                     environmentType->getPointerTo(),
-                                     dispatchStateType->getPointerTo(),
-                                     workgroupStateType->getPointerTo(),
-                                 },
-                                 /*isVarArg=*/false);
+  return llvm::FunctionType::get(
+      i32Type,
+      {
+          llvm::PointerType::get(environmentType, 0),
+          llvm::PointerType::get(dispatchStateType, 0),
+          llvm::PointerType::get(workgroupStateType, 0),
+      },
+      /*isVarArg=*/false);
 }
 
 // %struct.iree_hal_executable_dispatch_attrs_v0_t = type {
@@ -181,15 +184,15 @@ makeStageLocationTableType(llvm::LLVMContext &context) {
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
   auto *i8PtrType = llvm::PointerType::getUnqual(context);
   auto *sourceLocationType = makeSourceLocationType(context);
-  auto *type =
-      llvm::StructType::create(context,
-                               {
-                                   i32Type,
-                                   i8PtrType->getPointerTo(),
-                                   sourceLocationType->getPointerTo(),
-                               },
-                               "iree_hal_executable_stage_location_table_v0_t",
-                               /*isPacked=*/false);
+  auto *type = llvm::StructType::create(
+      context,
+      {
+          i32Type,
+          llvm::PointerType::get(i8PtrType, 0),
+          llvm::PointerType::get(sourceLocationType, 0),
+      },
+      "iree_hal_executable_stage_location_table_v0_t",
+      /*isPacked=*/false);
   return type;
 }
 
@@ -209,6 +212,8 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) {
   }
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
   auto *dispatchFunctionType = makeDispatchFunctionType(context);
+  auto *dispatchFunctionPointerType =
+      llvm::PointerType::get(dispatchFunctionType, 0);
   auto *dispatchAttrsType = makeDispatchAttrsType(context);
   auto *i8PtrType = llvm::PointerType::getUnqual(context);
   auto *sourceLocationType = makeSourceLocationType(context);
@@ -217,12 +222,12 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) {
       context,
       {
           i32Type,
-          dispatchFunctionType->getPointerTo()->getPointerTo(),
-          dispatchAttrsType->getPointerTo(),
-          i8PtrType->getPointerTo(),
-          i8PtrType->getPointerTo(),
-          sourceLocationType->getPointerTo(),
-          stageLocationTableType->getPointerTo(),
+          llvm::PointerType::get(dispatchFunctionPointerType, 0),
+          llvm::PointerType::get(dispatchAttrsType, 0),
+          llvm::PointerType::get(i8PtrType, 0),
+          llvm::PointerType::get(i8PtrType, 0),
+          llvm::PointerType::get(sourceLocationType, 0),
+          llvm::PointerType::get(stageLocationTableType, 0),
       },
       "iree_hal_executable_export_table_v0_t",
       /*isPacked=*/false);
@@ -288,7 +293,7 @@ static llvm::StructType *makeSourceTableType(llvm::LLVMContext &context) {
       llvm::StructType::create(context,
                                {
                                    i32Type,
-                                   sourceFileType->getPointerTo(),
+                                   llvm::PointerType::get(sourceFileType, 0),
                                },
                                "iree_hal_executable_source_file_table_v0_t",
                                /*isPacked=*/false);
@@ -335,16 +340,17 @@ static llvm::StructType *makeLibraryType(llvm::StructType *libraryHeaderType) {
   auto *exportTableType = makeExportTableType(context);
   auto *constantTableType = makeConstantTableType(context);
   auto *sourceTableType = makeSourceTableType(context);
-  auto *type = llvm::StructType::create(context,
-                                        {
-                                            libraryHeaderType->getPointerTo(),
-                                            importTableType,
-                                            exportTableType,
-                                            constantTableType,
-                                            sourceTableType,
-                                        },
-                                        "iree_hal_executable_library_v0_t",
-                                        /*isPacked=*/false);
+  auto *type =
+      llvm::StructType::create(context,
+                               {
+                                   llvm::PointerType::get(libraryHeaderType, 0),
+                                   importTableType,
+                                   exportTableType,
+                                   constantTableType,
+                                   sourceTableType,
+                               },
+                               "iree_hal_executable_library_v0_t",
+                               /*isPacked=*/false);
   return type;
 }
 
@@ -379,7 +385,7 @@ static llvm::Constant *createStringConstantOrNull(StringRef value,
                                                   llvm::Module *module) {
   if (value.empty()) {
     auto i8Type = llvm::IntegerType::getInt8Ty(module->getContext());
-    return llvm::ConstantPointerNull::get(i8Type->getPointerTo());
+    return llvm::ConstantPointerNull::get(llvm::PointerType::get(i8Type, 0));
   }
   return createStringConstant(value, module);
 }
@@ -427,13 +433,14 @@ static llvm::Constant *createArrayConstant(StringRef name,
 llvm::Function *LibraryBuilder::build(StringRef queryFuncName) {
   auto &context = module->getContext();
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
-  auto *environmentType = makeEnvironmentType(context)->getPointerTo();
+  auto *environmentStructType = makeEnvironmentType(context);
+  auto *environmentType = llvm::PointerType::get(environmentStructType, 0);
   auto *libraryHeaderType = makeLibraryHeaderType(context);
 
   // %struct.iree_hal_executable_library_header_t**
   // @iree_hal_library_query(i32, %struct.iree_hal_executable_environment_v0_t*)
   auto *queryFuncType =
-      llvm::FunctionType::get(libraryHeaderType->getPointerTo(),
+      llvm::FunctionType::get(llvm::PointerType::get(libraryHeaderType, 0),
                               {
                                   i32Type,
                                   environmentType,
@@ -454,8 +461,10 @@ llvm::Function *LibraryBuilder::build(StringRef queryFuncName) {
       builder.CreateICmpEQ(func->getArg(0),
                            llvm::ConstantInt::get(
                                i32Type, static_cast<int64_t>(Version::LATEST))),
-      builder.CreatePointerCast(v0, libraryHeaderType->getPointerTo()),
-      llvm::ConstantPointerNull::get(libraryHeaderType->getPointerTo())));
+      builder.CreatePointerCast(v0,
+                                llvm::PointerType::get(libraryHeaderType, 0)),
+      llvm::ConstantPointerNull::get(
+          llvm::PointerType::get(libraryHeaderType, 0))));
 
   return func;
 }
@@ -467,7 +476,7 @@ LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) {
   auto *i8Type = llvm::IntegerType::getInt8Ty(context);
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
   llvm::Constant *symbolNames =
-      llvm::Constant::getNullValue(i8Type->getPointerTo());
+      llvm::Constant::getNullValue(llvm::PointerType::get(i8Type, 0));
   if (!imports.empty()) {
     SmallVector<llvm::Constant *> symbolNameValues;
     for (auto &import : imports) {
@@ -476,9 +485,9 @@ LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) {
         symbolName = "?" + symbolName;
       symbolNameValues.push_back(createStringConstant(symbolName, module));
     }
-    symbolNames =
-        createArrayConstant(libraryName + "_import_names",
-                            i8Type->getPointerTo(), symbolNameValues, module);
+    symbolNames = createArrayConstant(libraryName + "_import_names",
+                                      llvm::PointerType::get(i8Type, 0),
+                                      symbolNameValues, module);
   }
   return llvm::ConstantStruct::get(
       importTableType, {
@@ -507,12 +516,12 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
   for (auto dispatch : exports)
     exportPtrValues.push_back(dispatch.func);
   llvm::Constant *exportPtrs = createArrayConstant(
-      libraryName + "_funcs", dispatchFunctionType->getPointerTo(),
+      libraryName + "_funcs", llvm::PointerType::get(dispatchFunctionType, 0),
       exportPtrValues, module);
 
   // iree_hal_executable_export_table_v0_t::attrs
   llvm::Constant *exportAttrs =
-      llvm::Constant::getNullValue(i32Type->getPointerTo());
+      llvm::Constant::getNullValue(llvm::PointerType::get(i32Type, 0));
   bool hasNonDefaultAttrs = llvm::any_of(exports, [](const auto &dispatch) {
     return !dispatch.attrs.isDefault();
   });
@@ -557,19 +566,20 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
 
   // iree_hal_executable_export_table_v0_t::names
   llvm::Constant *exportNames =
-      llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo());
+      llvm::Constant::getNullValue(llvm::PointerType::get(i8Type, 0));
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS) {
     SmallVector<llvm::Constant *> exportNameValues;
     for (auto dispatch : exports)
       exportNameValues.push_back(createStringConstant(dispatch.name, module));
-    exportNames =
-        createArrayConstant(libraryName + "_names", i8Type->getPointerTo(),
-                            exportNameValues, module);
+    exportNames = createArrayConstant(libraryName + "_names",
+                                      llvm::PointerType::get(i8Type, 0),
+                                      exportNameValues, module);
   }
 
   // iree_hal_executable_export_table_v0_t::tags
+  auto *i8PtrType = llvm::PointerType::get(i8Type, 0);
   llvm::Constant *exportTags =
-      llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo());
+      llvm::Constant::getNullValue(llvm::PointerType::get(i8PtrType, 0));
   bool hasAnyTags = llvm::any_of(
       exports, [](auto &dispatch) { return !dispatch.tag.empty(); });
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS && hasAnyTags) {
@@ -577,13 +587,14 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
     for (auto dispatch : exports)
       exportTagValues.push_back(
           createStringConstantOrNull(dispatch.tag, module));
-    exportTags = createArrayConstant(
-        libraryName + "_tags", i8Type->getPointerTo(), exportTagValues, module);
+    exportTags = createArrayConstant(libraryName + "_tags",
+                                     llvm::PointerType::get(i8Type, 0),
+                                     exportTagValues, module);
   }
 
   // iree_hal_executable_export_table_v0_t::source_locations
-  llvm::Constant *exportSourceLocations =
-      llvm::Constant::getNullValue(sourceLocationType->getPointerTo());
+  llvm::Constant *exportSourceLocations = llvm::Constant::getNullValue(
+      llvm::PointerType::get(sourceLocationType, 0));
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS) {
     SmallVector<llvm::Constant *> exportSourceLocationValues;
     for (auto dispatch : exports) {
@@ -605,8 +616,8 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
   }
 
   // iree_hal_executable_export_table_v0_t::stage_locations
-  llvm::Constant *exportStageLocations =
-      llvm::Constant::getNullValue(stageLocationTableType->getPointerTo());
+  llvm::Constant *exportStageLocations = llvm::Constant::getNullValue(
+      llvm::PointerType::get(stageLocationTableType, 0));
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS) {
     SmallVector<llvm::Constant *> exportStageTableValues;
     for (auto dispatch : exports) {
@@ -628,7 +639,7 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
       }
       llvm::Constant *stageNamesPtr = createArrayConstant(
           libraryName + "_" + dispatch.name + "_stage_names",
-          i8Type->getPointerTo(), exportStageNameValues, module);
+          llvm::PointerType::get(i8Type, 0), exportStageNameValues, module);
       llvm::Constant *sourceLocationsPtr = createArrayConstant(
           libraryName + "_" + dispatch.name + "_stage_source_locations",
           sourceLocationType, exportSourceLocationValues, module);
@@ -688,7 +699,7 @@ LibraryBuilder::buildLibraryV0SourceTable(std::string libraryName) {
   auto *sourceTableType = makeSourceTableType(context);
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
   llvm::Constant *sourceFilesValue =
-      llvm::Constant::getNullValue(sourceFileType->getPointerTo());
+      llvm::Constant::getNullValue(llvm::PointerType::get(sourceFileType, 0));
   if (!sourceFiles.empty()) {
     SmallVector<llvm::Constant *> sourceFileValues;
     for (auto &sourceFile : sourceFiles) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir
index fab5da8f4595..72cf235b2b08 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir
@@ -20,8 +20,7 @@ func.func @distribute_thread_forall(%out : memref<?xi32>)
 //       CHECK:     %[[LINID:.+]] = affine.apply
 //  CHECK-SAME:       affine_map<(d0)[s0, s1, s2] -> (d0 + s0 + s1 * 64 + s2 * 128)>(%[[I]])
 //  CHECK-SAME:       [%[[TX]], %[[TY]], %[[TZ]]]
-//       CHECK:     %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c1024) : index
-//       CHECK:     memref.store {{.*}}[%[[DELIN]]]
+//       CHECK:     memref.store {{.*}}[%[[LINID]]]
 
 // -----
 
@@ -44,8 +43,7 @@ func.func @distribute_warp_forall(%out : memref<?xi32>)
 //       CHECK:     %[[LINID:.+]] = affine.apply
 //  CHECK-SAME:       affine_map<(d0)[s0, s1, s2] -> (d0 + s1 * 2 + s2 * 4 + s0 floordiv 32)>(%[[I]])
 //  CHECK-SAME:       [%[[TX]], %[[TY]], %[[TZ]]]
-//       CHECK:     %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c32) : index
-//       CHECK:     memref.store {{.*}}[%[[DELIN]]]
+//       CHECK:     memref.store {{.*}}[%[[LINID]]]
 
 // -----
 
@@ -85,8 +83,7 @@ func.func @distribute_thread_forall_drop_for_loop(%out : memref<?xi32>)
 //       CHECK:   %[[LINID:.+]] = affine.apply
 //  CHECK-SAME:     affine_map<()[s0, s1, s2] -> (s0 + s1 * 64 + s2 * 128)>
 //  CHECK-SAME:     [%[[TX]], %[[TY]], %[[TZ]]]
-//       CHECK:   %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c128) : index
-//       CHECK:   memref.store {{.*}}[%[[DELIN]]]
+//       CHECK:   memref.store {{.*}}[%[[LINID]]]
 
 // -----
 
@@ -102,7 +99,6 @@ func.func @distribute_thread_forall_single_thread(%out : memref<?xi32>)
 }
 
 // CHECK-LABEL: func @distribute_thread_forall_single_thread
-//   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[TX:.+]] = gpu.thread_id x
 //   CHECK-DAG:   %[[TY:.+]] = gpu.thread_id y
 //   CHECK-DAG:   %[[TZ:.+]] = gpu.thread_id z
@@ -110,7 +106,7 @@ func.func @distribute_thread_forall_single_thread(%out : memref<?xi32>)
 //  CHECK-SAME:     affine_map<()[s0, s1, s2] -> (s0 + s1 * 64 + s2 * 128)>
 //  CHECK-SAME:     [%[[TX]], %[[TY]], %[[TZ]]]
 //       CHECK:   scf.for %[[I:.+]] = %[[LINID]] to %c1 step %c128 {
-//       CHECK:     memref.store {{.*}}[%[[C0]]]
+//       CHECK:     memref.store {{.*}}[%[[I]]]
 
 // -----
 
@@ -133,7 +129,7 @@ func.func @distribute_thread_forall_multi_dim(%out : memref<?x?x?xi32>)
 //       CHECK:     %[[LINID:.+]] = affine.apply
 //  CHECK-SAME:       affine_map<(d0)[s0, s1, s2] -> (d0 + s0 + s1 * 64 + s2 * 128)>(%[[I]])
 //  CHECK-SAME:       [%[[TX]], %[[TY]], %[[TZ]]]
-//       CHECK:     %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (%c16, %c8, %c4) : index
+//       CHECK:     %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (16, 8, 4) : index
 //       CHECK:     memref.store {{.*}}[%[[DELIN]]#0, %[[DELIN]]#1, %[[DELIN]]#2]
 
 
@@ -157,5 +153,4 @@ func.func @distribute_thread_forall_small_workgroup(%out : memref<?xi32>)
 //       CHECK:   %[[LINID:.+]] = affine.apply
 //  CHECK-SAME:     affine_map<()[s0, s1, s2] -> (s0 + s1 * 7 + s2 * 7)>
 //  CHECK-SAME:     [%[[TX]], %[[TY]], %[[TZ]]]
-//       CHECK:   %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c7) : index
-//       CHECK:   memref.store {{.*}}[%[[DELIN]]]
+//       CHECK:   memref.store {{.*}}[%[[LINID]]]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
index e365a2b9569c..8841b7fa73d3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
@@ -302,6 +302,6 @@ module {
 
 // CHECK-LABEL: func @simple_nd_write(
 //       CHECK:   %[[RD:.+]] = vector.transfer_read {{.*}} vector<1x128xf32>
-//       CHECK:   %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (%c4, %c8) : index, index
+//       CHECK:   %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (4, 8) : index, index
 //       CHECK:   %[[INNER_ID:.+]] = affine.apply #[[$MAP]]()[%[[IDS]]#1]
 //       CHECK:   vector.transfer_write %[[RD]], %{{.*}}[%[[IDS]]#0, %[[INNER_ID]]] {{.*}} : vector<1x128xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
index dabd2854a8a8..fd89230e161c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
@@ -200,7 +200,7 @@ func.func @batch_matmul_with_padding_strategy(%arg0: tensor<1x?x1280xf16>, %arg1
   %4 = tensor.empty() : tensor<1x64x128xf16>
   %5 = vector.transfer_write %cst, %4[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x128xf16>, tensor<1x64x128xf16>
   %6 = scf.for %arg2 = %c0 to %c20 step %c1 iter_args(%arg3 = %5) -> (tensor<1x64x128xf16>) {
-    %7 = affine.delinearize_index %arg2 into (%c20) : index
+    %7 = affine.delinearize_index %arg2 into (20) : index
     %8 = affine.apply #map()[%7]
     %extracted_slice_1 = tensor.extract_slice %arg1[0, %8, 0] [1, 64, 128] [1, 1, 1] : tensor<1x1280x128xf16> to tensor<1x64x128xf16>
     %extracted_slice_2 = tensor.extract_slice %arg0[0, 0, %8] [1, %3, 64] [1, 1, 1] : tensor<1x?x1280xf16> to tensor<1x?x64xf16>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
index c7e95db23cd2..f00d249574f9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
@@ -377,14 +377,11 @@ hal.executable private @scf_forall_4D_static_interchange {
 //  CHECK-DAG:   %[[C160:.+]] = arith.constant 160 : index
 //      CHECK:   hal.return %[[C6]], %[[C7]], %[[C160]]
 //      CHECK: func @scf_forall_4D_static_interchange()
-//  CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
-//  CHECK-DAG:   %[[C8:.+]] = arith.constant 8 : index
-//  CHECK-DAG:   %[[C5:.+]] = arith.constant 5 : index
 //  CHECK-DAG:   %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0]
 //  CHECK-DAG:   %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1]
 //  CHECK-DAG:   %[[WG_ID_Z:.+]] = hal.interface.workgroup.id[2]
 //  CHECK-NOT:   scf.forall
-//      CHECK:   %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[WG_ID_Z]] into (%[[C5]], %[[C8]], %[[C4]])
+//      CHECK:   %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[WG_ID_Z]] into (5, 8, 4)
 //      CHECK:   %[[I:.+]] = affine.apply #[[MAP0]]()[%[[DELINEARIZE]]#0]
 //      CHECK:   %[[J:.+]] = affine.apply #[[MAP1]]()[%[[WG_ID_X]]]
 //      CHECK:   %[[K:.+]] = affine.apply #[[MAP2]]()[%[[WG_ID_Y]]]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir
index cd6ab2bb90ba..c4a21b0073e0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir
@@ -32,7 +32,7 @@ module attributes { transform.with_named_sequence } {
 
 // CHECK-LABEL: func @flatten_forall_thread_mapping
 //       CHECK:   scf.forall (%[[FLAT_ID:.+]]) in (64)
-//       CHECK:     %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (%c4, %c16) : index, index
+//       CHECK:     %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (4, 16) : index, index
 //   CHECK-DAG:     %[[IDX:.+]] = affine.apply #[[$MAP]](%[[IDS]]#0)
 //   CHECK-DAG:     %[[IDY:.+]] = affine.apply #[[$MAP1]](%[[IDS]]#1)
 //       CHECK:   } {mapping = [#gpu.thread<linear_dim_0>]}
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir
index ecd704c77525..5e3ef323f7e0 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir
@@ -4,10 +4,8 @@
 #map1 = affine_map<(d0) -> (d0 * 16)>
 module {
   func.func @distribute_lane_forall(%arg0: memref<128x128xf32>, %dest: memref<128x128xf32>) {
-    %c4 = arith.constant 4 : index
-    %c16 = arith.constant 16 : index
     scf.forall (%id) in (64) {
-      %ids:2 = affine.delinearize_index %id into (%c4, %c16) : index, index
+      %ids:2 = affine.delinearize_index %id into (4, 16) : index, index
       %3 = affine.apply #map(%ids#0)
       %4 = affine.apply #map1(%ids#1)
       %in_view = memref.subview %arg0[%3, %4] [32, 8] [1, 1] : memref<128x128xf32> to memref<32x8xf32, strided<[128, 1], offset: ?>>
@@ -32,5 +30,5 @@ module attributes { transform.with_named_sequence } {
 // CHECK-LABEL: func @distribute_lane_forall
 //       CHECK:   %[[LANE_ID:.+]] = gpu.lane_id
 //   CHECK-NOT:   scf.forall
-//       CHECK:   affine.delinearize_index %[[LANE_ID]] into (%c4, %c16) : index, index
+//       CHECK:   affine.delinearize_index %[[LANE_ID]] into (4, 16) : index, index
 //       CHECK:   linalg.copy
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir
index 780e102ec73a..e317674d6c2e 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir
@@ -52,7 +52,7 @@ module attributes { transform.with_named_sequence } {
 //       CHECK:     ^bb0(%[[INTERMEDIATE:.+]]: tensor<128x128xf32>):
 //       CHECK:       %[[LOOP:.+]] = scf.for %[[I:.+]] = %c0 to %c64{{.*}} step %c64{{.*}} iter_args(%[[ITER:.+]] = %[[INTERMEDIATE]]) -> (tensor<128x128xf32>)
 //       CHECK:         %[[LINEARID:.+]] = affine.apply #[[$MAP2]](%[[I]], %[[IDX]], %[[IDY]])
-//       CHECK:         %[[IDS:.+]]:2 = affine.delinearize_index %[[LINEARID]] into (%c64, %c1) : index, index
+//       CHECK:         %[[IDS:.+]]:2 = affine.delinearize_index %[[LINEARID]] into (64, 1) : index, index
 //       CHECK:         %[[INID0:.+]] = affine.apply #[[$MAP3]](%[[IDS]]#0)
 //       CHECK:         %[[INSLICE0:.+]] = tensor.extract_slice %[[ARG0]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32>
 //       CHECK:         %[[INSLICE1:.+]] = tensor.extract_slice %[[ITER]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32>
@@ -251,7 +251,7 @@ module attributes { transform.with_named_sequence } {
 //       CHECK:       %[[BARRIER:.+]] = iree_gpu.barrier_region ins(%[[ALLOC]] : tensor<128x128xf32>)
 //       CHECK:       %[[LOOP:.+]] = scf.for %[[I:.+]] = %c0 to %c64{{.*}} step %c64{{.*}} iter_args(%[[ITER:.+]] = %{{.*}}) -> (tensor<128x128xf32>)
 //       CHECK:         %[[FLAT_ID:.+]] = affine.apply #[[$MAP4]](%[[I]], %[[L_IDY]], %[[L_IDX]], %[[W_IDX]], %[[W_IDY]])
-//       CHECK:         %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (%c64, %c1) : index, index
+//       CHECK:         %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (64, 1) : index, index
 //       CHECK:         %[[IDX:.+]] = affine.apply #[[$MAP5]](%[[IDS]]#0)
 //       CHECK:         %[[COPY:.+]] = linalg.copy
 //       CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[COPY]] into %[[ITER]][%[[IDX]], %[[IDS]]#1] [2, 128]
@@ -311,7 +311,7 @@ module attributes { transform.with_named_sequence } {
 //       CHECK:     iree_gpu.barrier_region ins(%[[ALLOC]]
 //       CHECK:       %[[LINEARID:.+]] = affine.apply #[[$MAP1]](%[[IDX]], %[[IDY]])
 //       CHECK:       scf.for %[[I:.+]] = %[[LINEARID]] to %c32{{.*}} step %c64{{.*}}
-//       CHECK:         %[[IDS:.+]] = affine.delinearize_index %[[I]] into (%c32) : index
+//       CHECK:         %[[IDS:.+]] = affine.delinearize_index %[[I]] into (32) : index
 //       CHECK:         scf.yield
 //       CHECK:       unroll_loop
 //       CHECK:   } {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir
index 957ca10ffd80..4a949b671442 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir
@@ -393,11 +393,9 @@ func.func @data_tiled_1x1x1_tensor_multi_mma(%lhs: tensor<1x1x4x16xf32>, %rhs: t
 //  CHECK-SAME:   %[[LHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[RHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[ACC:[A-Za-z0-9]+]]
-//   CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C16:.+]] = arith.constant 16 : index
 //       CHECK:   scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x4x16x4xf32>)
 //       CHECK:     %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
-//   CHECK-DAG:     %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]])
+//   CHECK-DAG:     %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (4, 16)
 //   CHECK-DAG:     %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1]
 //   CHECK-DAG:     %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1]
 //   CHECK-DAG:     %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]]
@@ -432,11 +430,9 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled(%lhs: tensor<1x1x2x4x16x4x
 //  CHECK-SAME:   %[[LHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[RHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[ACC:[A-Za-z0-9]+]]
-//   CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C16:.+]] = arith.constant 16 : index
 //       CHECK:   scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>)
 //       CHECK:     %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
-//   CHECK-DAG:     %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]])
+//   CHECK-DAG:     %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (4, 16)
 //   CHECK-DAG:     %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]]
 //  CHECK-SAME:       [0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1]
 //   CHECK-DAG:     %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]]
@@ -474,18 +470,15 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled_to_subgroups(%lhs: tensor<
 //  CHECK-SAME:   %[[LHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[RHS:[A-Za-z0-9]+]]
 //  CHECK-SAME:   %[[ACC:[A-Za-z0-9]+]]
-//   CHECK-DAG:   %[[C2:.+]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C16:.+]] = arith.constant 16 : index
 //       CHECK:   scf.forall (%[[THREAD_ID:.+]]) in (256) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>)
 //       CHECK:     %[[ID_CLAMPED_128:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
-//   CHECK-DAG:     %[[IN_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED_128]] into (%[[C2]], %[[C4]], %[[C16]])
+//   CHECK-DAG:     %[[IN_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED_128]] into (2, 4, 16)
 //   CHECK-DAG:     %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]]
 //  CHECK-SAME:       [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
 //   CHECK-DAG:     %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]]
 //  CHECK-SAME:       [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
 //       CHECK:     %[[ID_CLAMPED_256:.+]] = affine.apply #[[$MAP1]](%[[THREAD_ID]])
-//   CHECK-DAG:     %[[ACC_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_256]] into (%[[C2]], %[[C2]], %[[C4]], %[[C16]])
+//   CHECK-DAG:     %[[ACC_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_256]] into (2, 2, 4, 16)
 //   CHECK-DAG:     %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]]
 //  CHECK-SAME:       [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, 0] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
 //       CHECK:     %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS_SLICE]], %[[RHS_SLICE]], %[[ACC_SLICE]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
index e1805bbcd04c..e2d61a3cea4a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
@@ -553,7 +553,7 @@ hal.executable public @main {
 //   CHECK-DAG:   %[[IDY:.+]] = gpu.thread_id  y
 //   CHECK-DAG:   %[[IDZ:.+]] = gpu.thread_id  z
 //       CHECK:   %[[LINID0:.+]] = affine.apply #[[$MAP]]()[%[[IDX]], %[[IDY]], %[[IDZ]]]
-//       CHECK:   %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (%c4, %c8) : index, index
+//       CHECK:   %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (4, 8) : index, index
 //       CHECK:   %[[LINID1:.+]] = affine.apply #[[$MAP1]]()[%[[IDS]]#0, %[[IDS]]#1]
 //       CHECK:   scf.forall ({{.*}}) in (32, 98) {
 //       CHECK:     scf.for %{{.*}} = %c0 to %c256 step %c4 {{.*}} -> (vector<1x4xf32>)
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir
index 960fe0b9938c..c4de8342c9f1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir
@@ -208,7 +208,7 @@ hal.executable @matmul_multiple_k {
 
 // CHECK-LABEL: func.func @matmul_multiple_k
 // CHECK:          scf.for %[[IV:.+]] = %c0 to %c2048 step %c1
-// CHECK:            affine.delinearize_index %[[IV]] into (%c128, %c16)
+// CHECK:            affine.delinearize_index %[[IV]] into (128, 16)
 // CHECK-COUNT-32:   amdgpu.mfma
 // CHECK:            scf.yield
 // CHECK-COUNT-4:  vector.transfer_write {{.+}} {in_bounds = [true, true]} : vector<4x1xf16>, memref<2x10x64x64xf16, #hal.descriptor_type<storage_buffer>>
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 8323ca8956ae..ac39504813f8 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 8323ca8956aec45713231e06768a0b330f83cce1
+Subproject commit ac39504813f8c52f10c0e364485569bff5a5f7a1