From 92526ea886a160e5b6c21554621769d78f0a4fdb Mon Sep 17 00:00:00 2001 From: Quinn Dawkins Date: Thu, 7 Nov 2024 18:32:50 -0500 Subject: [PATCH] Integrate llvm-project@b358f21 (#19066) - llvm::Type->getPointerTo was deprecated and replaced with llvm::PointerType::get - affine.delinearize_index now takes mixed attr/value operands Includes two fixes - SCF::TileAndFuseConsumerOfSlice was incorrectly determining all DPS ops to be tilable - A new PackOp + CastOp folder was dropping lowering configs --- .../plugins/target/LLVMCPU/LibraryBuilder.cpp | 139 ++++++++++-------- .../GPU/test/gpu_distribute_forall.mlir | 17 +-- .../GPU/test/vector_reduction_to_gpu.mlir | 2 +- ...optimize_tensor_insert_extract_slices.mlir | 2 +- .../test/reconcile_translation_info.mlir | 5 +- .../Common/test/transform_flatten_forall.mlir | 2 +- .../test/distribute_lane_forall.mlir | 6 +- .../test/transform_fuse_forall.mlir | 6 +- .../test/distribute_mma_to_lanes.mlir | 15 +- .../test/ROCDL/pipeline_tile_and_fuse.mlir | 2 +- .../pipeline_vector_distribute_gfx940.mlir | 2 +- third_party/llvm-project | 2 +- 12 files changed, 97 insertions(+), 103 deletions(-) diff --git a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp index a4402ed6db0b..d0e8786c274b 100644 --- a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp +++ b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp @@ -6,6 +6,7 @@ #include "compiler/plugins/target/LLVMCPU/LibraryBuilder.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" // ============================================================================= @@ -42,13 +43,14 @@ static llvm::StructType *makeImportTableType(llvm::LLVMContext &context) { } auto *i32Type = llvm::IntegerType::getInt32Ty(context); auto *i8PtrType = llvm::PointerType::getUnqual(context); - auto *type = llvm::StructType::create(context, - { - i32Type, - i8PtrType->getPointerTo(), - }, - "iree_hal_executable_import_table_v0_t", - /*isPacked=*/false); + auto *type = + llvm::StructType::create(context, + { + i32Type, + llvm::PointerType::get(i8PtrType, 0), + }, + "iree_hal_executable_import_table_v0_t", + /*isPacked=*/false); return type; } @@ -100,13 +102,14 @@ makeDispatchFunctionType(llvm::LLVMContext &context) { auto *dispatchStateType = makeDispatchStateType(context); auto *workgroupStateType = makeWorkgroupStateType(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); - return llvm::FunctionType::get(i32Type, - { - environmentType->getPointerTo(), - dispatchStateType->getPointerTo(), - workgroupStateType->getPointerTo(), - }, - /*isVarArg=*/false); + return llvm::FunctionType::get( + i32Type, + { + llvm::PointerType::get(environmentType, 0), + llvm::PointerType::get(dispatchStateType, 0), + llvm::PointerType::get(workgroupStateType, 0), + }, + /*isVarArg=*/false); } // %struct.iree_hal_executable_dispatch_attrs_v0_t = type { @@ -181,15 +184,15 @@ makeStageLocationTableType(llvm::LLVMContext &context) { auto *i32Type = llvm::IntegerType::getInt32Ty(context); auto *i8PtrType = llvm::PointerType::getUnqual(context); auto *sourceLocationType = makeSourceLocationType(context); - auto *type = - llvm::StructType::create(context, - { - i32Type, - i8PtrType->getPointerTo(), - sourceLocationType->getPointerTo(), - }, - "iree_hal_executable_stage_location_table_v0_t", - /*isPacked=*/false); + auto *type = llvm::StructType::create( + context, + { + i32Type, + llvm::PointerType::get(i8PtrType, 0), + llvm::PointerType::get(sourceLocationType, 0), + }, + "iree_hal_executable_stage_location_table_v0_t", + /*isPacked=*/false); return type; } @@ -209,6 +212,8 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) { } auto *i32Type = llvm::IntegerType::getInt32Ty(context); auto *dispatchFunctionType = makeDispatchFunctionType(context); + auto *dispatchFunctionPointerType = + llvm::PointerType::get(dispatchFunctionType, 0); auto *dispatchAttrsType = makeDispatchAttrsType(context); auto *i8PtrType = llvm::PointerType::getUnqual(context); auto *sourceLocationType = makeSourceLocationType(context); @@ -217,12 +222,12 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) { context, { i32Type, - dispatchFunctionType->getPointerTo()->getPointerTo(), - dispatchAttrsType->getPointerTo(), - i8PtrType->getPointerTo(), - i8PtrType->getPointerTo(), - sourceLocationType->getPointerTo(), - stageLocationTableType->getPointerTo(), + llvm::PointerType::get(dispatchFunctionPointerType, 0), + llvm::PointerType::get(dispatchAttrsType, 0), + llvm::PointerType::get(i8PtrType, 0), + llvm::PointerType::get(i8PtrType, 0), + llvm::PointerType::get(sourceLocationType, 0), + llvm::PointerType::get(stageLocationTableType, 0), }, "iree_hal_executable_export_table_v0_t", /*isPacked=*/false); @@ -288,7 +293,7 @@ static llvm::StructType *makeSourceTableType(llvm::LLVMContext &context) { llvm::StructType::create(context, { i32Type, - sourceFileType->getPointerTo(), + llvm::PointerType::get(sourceFileType, 0), }, "iree_hal_executable_source_file_table_v0_t", /*isPacked=*/false); @@ -335,16 +340,17 @@ static llvm::StructType *makeLibraryType(llvm::StructType *libraryHeaderType) { auto *exportTableType = makeExportTableType(context); auto *constantTableType = makeConstantTableType(context); auto *sourceTableType = makeSourceTableType(context); - auto *type = llvm::StructType::create(context, - { - libraryHeaderType->getPointerTo(), - importTableType, - exportTableType, - constantTableType, - sourceTableType, - }, - "iree_hal_executable_library_v0_t", - /*isPacked=*/false); + auto *type = + llvm::StructType::create(context, + { + llvm::PointerType::get(libraryHeaderType, 0), + importTableType, + exportTableType, + constantTableType, + sourceTableType, + }, + "iree_hal_executable_library_v0_t", + /*isPacked=*/false); return type; } @@ -379,7 +385,7 @@ static llvm::Constant *createStringConstantOrNull(StringRef value, llvm::Module *module) { if (value.empty()) { auto i8Type = llvm::IntegerType::getInt8Ty(module->getContext()); - return llvm::ConstantPointerNull::get(i8Type->getPointerTo()); + return llvm::ConstantPointerNull::get(llvm::PointerType::get(i8Type, 0)); } return createStringConstant(value, module); } @@ -427,13 +433,14 @@ static llvm::Constant *createArrayConstant(StringRef name, llvm::Function *LibraryBuilder::build(StringRef queryFuncName) { auto &context = module->getContext(); auto *i32Type = llvm::IntegerType::getInt32Ty(context); - auto *environmentType = makeEnvironmentType(context)->getPointerTo(); + auto *environmentStructType = makeEnvironmentType(context); + auto *environmentType = llvm::PointerType::get(environmentStructType, 0); auto *libraryHeaderType = makeLibraryHeaderType(context); // %struct.iree_hal_executable_library_header_t** // @iree_hal_library_query(i32, %struct.iree_hal_executable_environment_v0_t*) auto *queryFuncType = - llvm::FunctionType::get(libraryHeaderType->getPointerTo(), + llvm::FunctionType::get(llvm::PointerType::get(libraryHeaderType, 0), { i32Type, environmentType, @@ -454,8 +461,10 @@ llvm::Function *LibraryBuilder::build(StringRef queryFuncName) { builder.CreateICmpEQ(func->getArg(0), llvm::ConstantInt::get( i32Type, static_cast(Version::LATEST))), - builder.CreatePointerCast(v0, libraryHeaderType->getPointerTo()), - llvm::ConstantPointerNull::get(libraryHeaderType->getPointerTo()))); + builder.CreatePointerCast(v0, + llvm::PointerType::get(libraryHeaderType, 0)), + llvm::ConstantPointerNull::get( + llvm::PointerType::get(libraryHeaderType, 0)))); return func; } @@ -467,7 +476,7 @@ LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) { auto *i8Type = llvm::IntegerType::getInt8Ty(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); llvm::Constant *symbolNames = - llvm::Constant::getNullValue(i8Type->getPointerTo()); + llvm::Constant::getNullValue(llvm::PointerType::get(i8Type, 0)); if (!imports.empty()) { SmallVector symbolNameValues; for (auto &import : imports) { @@ -476,9 +485,9 @@ LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) { symbolName = "?" + symbolName; symbolNameValues.push_back(createStringConstant(symbolName, module)); } - symbolNames = - createArrayConstant(libraryName + "_import_names", - i8Type->getPointerTo(), symbolNameValues, module); + symbolNames = createArrayConstant(libraryName + "_import_names", + llvm::PointerType::get(i8Type, 0), + symbolNameValues, module); } return llvm::ConstantStruct::get( importTableType, { @@ -507,12 +516,12 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { for (auto dispatch : exports) exportPtrValues.push_back(dispatch.func); llvm::Constant *exportPtrs = createArrayConstant( - libraryName + "_funcs", dispatchFunctionType->getPointerTo(), + libraryName + "_funcs", llvm::PointerType::get(dispatchFunctionType, 0), exportPtrValues, module); // iree_hal_executable_export_table_v0_t::attrs llvm::Constant *exportAttrs = - llvm::Constant::getNullValue(i32Type->getPointerTo()); + llvm::Constant::getNullValue(llvm::PointerType::get(i32Type, 0)); bool hasNonDefaultAttrs = llvm::any_of(exports, [](const auto &dispatch) { return !dispatch.attrs.isDefault(); }); @@ -557,19 +566,20 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { // iree_hal_executable_export_table_v0_t::names llvm::Constant *exportNames = - llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo()); + llvm::Constant::getNullValue(llvm::PointerType::get(i8Type, 0)); if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { SmallVector exportNameValues; for (auto dispatch : exports) exportNameValues.push_back(createStringConstant(dispatch.name, module)); - exportNames = - createArrayConstant(libraryName + "_names", i8Type->getPointerTo(), - exportNameValues, module); + exportNames = createArrayConstant(libraryName + "_names", + llvm::PointerType::get(i8Type, 0), + exportNameValues, module); } // iree_hal_executable_export_table_v0_t::tags + auto *i8PtrType = llvm::PointerType::get(i8Type, 0); llvm::Constant *exportTags = - llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo()); + llvm::Constant::getNullValue(llvm::PointerType::get(i8PtrType, 0)); bool hasAnyTags = llvm::any_of( exports, [](auto &dispatch) { return !dispatch.tag.empty(); }); if (mode == Mode::INCLUDE_REFLECTION_ATTRS && hasAnyTags) { @@ -577,13 +587,14 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { for (auto dispatch : exports) exportTagValues.push_back( createStringConstantOrNull(dispatch.tag, module)); - exportTags = createArrayConstant( - libraryName + "_tags", i8Type->getPointerTo(), exportTagValues, module); + exportTags = createArrayConstant(libraryName + "_tags", + llvm::PointerType::get(i8Type, 0), + exportTagValues, module); } // iree_hal_executable_export_table_v0_t::source_locations - llvm::Constant *exportSourceLocations = - llvm::Constant::getNullValue(sourceLocationType->getPointerTo()); + llvm::Constant *exportSourceLocations = llvm::Constant::getNullValue( + llvm::PointerType::get(sourceLocationType, 0)); if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { SmallVector exportSourceLocationValues; for (auto dispatch : exports) { @@ -605,8 +616,8 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { } // iree_hal_executable_export_table_v0_t::stage_locations - llvm::Constant *exportStageLocations = - llvm::Constant::getNullValue(stageLocationTableType->getPointerTo()); + llvm::Constant *exportStageLocations = llvm::Constant::getNullValue( + llvm::PointerType::get(stageLocationTableType, 0)); if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { SmallVector exportStageTableValues; for (auto dispatch : exports) { @@ -628,7 +639,7 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { } llvm::Constant *stageNamesPtr = createArrayConstant( libraryName + "_" + dispatch.name + "_stage_names", - i8Type->getPointerTo(), exportStageNameValues, module); + llvm::PointerType::get(i8Type, 0), exportStageNameValues, module); llvm::Constant *sourceLocationsPtr = createArrayConstant( libraryName + "_" + dispatch.name + "_stage_source_locations", sourceLocationType, exportSourceLocationValues, module); @@ -688,7 +699,7 @@ LibraryBuilder::buildLibraryV0SourceTable(std::string libraryName) { auto *sourceTableType = makeSourceTableType(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); llvm::Constant *sourceFilesValue = - llvm::Constant::getNullValue(sourceFileType->getPointerTo()); + llvm::Constant::getNullValue(llvm::PointerType::get(sourceFileType, 0)); if (!sourceFiles.empty()) { SmallVector sourceFileValues; for (auto &sourceFile : sourceFiles) { diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir index fab5da8f4595..72cf235b2b08 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir @@ -20,8 +20,7 @@ func.func @distribute_thread_forall(%out : memref) // CHECK: %[[LINID:.+]] = affine.apply // CHECK-SAME: affine_map<(d0)[s0, s1, s2] -> (d0 + s0 + s1 * 64 + s2 * 128)>(%[[I]]) // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c1024) : index -// CHECK: memref.store {{.*}}[%[[DELIN]]] +// CHECK: memref.store {{.*}}[%[[LINID]]] // ----- @@ -44,8 +43,7 @@ func.func @distribute_warp_forall(%out : memref) // CHECK: %[[LINID:.+]] = affine.apply // CHECK-SAME: affine_map<(d0)[s0, s1, s2] -> (d0 + s1 * 2 + s2 * 4 + s0 floordiv 32)>(%[[I]]) // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c32) : index -// CHECK: memref.store {{.*}}[%[[DELIN]]] +// CHECK: memref.store {{.*}}[%[[LINID]]] // ----- @@ -85,8 +83,7 @@ func.func @distribute_thread_forall_drop_for_loop(%out : memref) // CHECK: %[[LINID:.+]] = affine.apply // CHECK-SAME: affine_map<()[s0, s1, s2] -> (s0 + s1 * 64 + s2 * 128)> // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c128) : index -// CHECK: memref.store {{.*}}[%[[DELIN]]] +// CHECK: memref.store {{.*}}[%[[LINID]]] // ----- @@ -102,7 +99,6 @@ func.func @distribute_thread_forall_single_thread(%out : memref) } // CHECK-LABEL: func @distribute_thread_forall_single_thread -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[TX:.+]] = gpu.thread_id x // CHECK-DAG: %[[TY:.+]] = gpu.thread_id y // CHECK-DAG: %[[TZ:.+]] = gpu.thread_id z @@ -110,7 +106,7 @@ func.func @distribute_thread_forall_single_thread(%out : memref) // CHECK-SAME: affine_map<()[s0, s1, s2] -> (s0 + s1 * 64 + s2 * 128)> // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] // CHECK: scf.for %[[I:.+]] = %[[LINID]] to %c1 step %c128 { -// CHECK: memref.store {{.*}}[%[[C0]]] +// CHECK: memref.store {{.*}}[%[[I]]] // ----- @@ -133,7 +129,7 @@ func.func @distribute_thread_forall_multi_dim(%out : memref) // CHECK: %[[LINID:.+]] = affine.apply // CHECK-SAME: affine_map<(d0)[s0, s1, s2] -> (d0 + s0 + s1 * 64 + s2 * 128)>(%[[I]]) // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (%c16, %c8, %c4) : index +// CHECK: %[[DELIN:.+]]:3 = affine.delinearize_index %[[LINID]] into (16, 8, 4) : index // CHECK: memref.store {{.*}}[%[[DELIN]]#0, %[[DELIN]]#1, %[[DELIN]]#2] @@ -157,5 +153,4 @@ func.func @distribute_thread_forall_small_workgroup(%out : memref) // CHECK: %[[LINID:.+]] = affine.apply // CHECK-SAME: affine_map<()[s0, s1, s2] -> (s0 + s1 * 7 + s2 * 7)> // CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[DELIN:.+]] = affine.delinearize_index %[[LINID]] into (%c7) : index -// CHECK: memref.store {{.*}}[%[[DELIN]]] +// CHECK: memref.store {{.*}}[%[[LINID]]] diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir index e365a2b9569c..8841b7fa73d3 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir @@ -302,6 +302,6 @@ module { // CHECK-LABEL: func @simple_nd_write( // CHECK: %[[RD:.+]] = vector.transfer_read {{.*}} vector<1x128xf32> -// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (%c4, %c8) : index, index +// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (4, 8) : index, index // CHECK: %[[INNER_ID:.+]] = affine.apply #[[$MAP]]()[%[[IDS]]#1] // CHECK: vector.transfer_write %[[RD]], %{{.*}}[%[[IDS]]#0, %[[INNER_ID]]] {{.*}} : vector<1x128xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir index dabd2854a8a8..fd89230e161c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir @@ -200,7 +200,7 @@ func.func @batch_matmul_with_padding_strategy(%arg0: tensor<1x?x1280xf16>, %arg1 %4 = tensor.empty() : tensor<1x64x128xf16> %5 = vector.transfer_write %cst, %4[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x128xf16>, tensor<1x64x128xf16> %6 = scf.for %arg2 = %c0 to %c20 step %c1 iter_args(%arg3 = %5) -> (tensor<1x64x128xf16>) { - %7 = affine.delinearize_index %arg2 into (%c20) : index + %7 = affine.delinearize_index %arg2 into (20) : index %8 = affine.apply #map()[%7] %extracted_slice_1 = tensor.extract_slice %arg1[0, %8, 0] [1, 64, 128] [1, 1, 1] : tensor<1x1280x128xf16> to tensor<1x64x128xf16> %extracted_slice_2 = tensor.extract_slice %arg0[0, 0, %8] [1, %3, 64] [1, 1, 1] : tensor<1x?x1280xf16> to tensor<1x?x64xf16> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir index c7e95db23cd2..f00d249574f9 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir @@ -377,14 +377,11 @@ hal.executable private @scf_forall_4D_static_interchange { // CHECK-DAG: %[[C160:.+]] = arith.constant 160 : index // CHECK: hal.return %[[C6]], %[[C7]], %[[C160]] // CHECK: func @scf_forall_4D_static_interchange() -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index -// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index // CHECK-DAG: %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0] // CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1] // CHECK-DAG: %[[WG_ID_Z:.+]] = hal.interface.workgroup.id[2] // CHECK-NOT: scf.forall -// CHECK: %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[WG_ID_Z]] into (%[[C5]], %[[C8]], %[[C4]]) +// CHECK: %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[WG_ID_Z]] into (5, 8, 4) // CHECK: %[[I:.+]] = affine.apply #[[MAP0]]()[%[[DELINEARIZE]]#0] // CHECK: %[[J:.+]] = affine.apply #[[MAP1]]()[%[[WG_ID_X]]] // CHECK: %[[K:.+]] = affine.apply #[[MAP2]]()[%[[WG_ID_Y]]] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir index cd6ab2bb90ba..c4a21b0073e0 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_flatten_forall.mlir @@ -32,7 +32,7 @@ module attributes { transform.with_named_sequence } { // CHECK-LABEL: func @flatten_forall_thread_mapping // CHECK: scf.forall (%[[FLAT_ID:.+]]) in (64) -// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (%c4, %c16) : index, index +// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (4, 16) : index, index // CHECK-DAG: %[[IDX:.+]] = affine.apply #[[$MAP]](%[[IDS]]#0) // CHECK-DAG: %[[IDY:.+]] = affine.apply #[[$MAP1]](%[[IDS]]#1) // CHECK: } {mapping = [#gpu.thread]} diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir index ecd704c77525..5e3ef323f7e0 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir @@ -4,10 +4,8 @@ #map1 = affine_map<(d0) -> (d0 * 16)> module { func.func @distribute_lane_forall(%arg0: memref<128x128xf32>, %dest: memref<128x128xf32>) { - %c4 = arith.constant 4 : index - %c16 = arith.constant 16 : index scf.forall (%id) in (64) { - %ids:2 = affine.delinearize_index %id into (%c4, %c16) : index, index + %ids:2 = affine.delinearize_index %id into (4, 16) : index, index %3 = affine.apply #map(%ids#0) %4 = affine.apply #map1(%ids#1) %in_view = memref.subview %arg0[%3, %4] [32, 8] [1, 1] : memref<128x128xf32> to memref<32x8xf32, strided<[128, 1], offset: ?>> @@ -32,5 +30,5 @@ module attributes { transform.with_named_sequence } { // CHECK-LABEL: func @distribute_lane_forall // CHECK: %[[LANE_ID:.+]] = gpu.lane_id // CHECK-NOT: scf.forall -// CHECK: affine.delinearize_index %[[LANE_ID]] into (%c4, %c16) : index, index +// CHECK: affine.delinearize_index %[[LANE_ID]] into (4, 16) : index, index // CHECK: linalg.copy diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir index 780e102ec73a..e317674d6c2e 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/transform_fuse_forall.mlir @@ -52,7 +52,7 @@ module attributes { transform.with_named_sequence } { // CHECK: ^bb0(%[[INTERMEDIATE:.+]]: tensor<128x128xf32>): // CHECK: %[[LOOP:.+]] = scf.for %[[I:.+]] = %c0 to %c64{{.*}} step %c64{{.*}} iter_args(%[[ITER:.+]] = %[[INTERMEDIATE]]) -> (tensor<128x128xf32>) // CHECK: %[[LINEARID:.+]] = affine.apply #[[$MAP2]](%[[I]], %[[IDX]], %[[IDY]]) -// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINEARID]] into (%c64, %c1) : index, index +// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINEARID]] into (64, 1) : index, index // CHECK: %[[INID0:.+]] = affine.apply #[[$MAP3]](%[[IDS]]#0) // CHECK: %[[INSLICE0:.+]] = tensor.extract_slice %[[ARG0]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32> // CHECK: %[[INSLICE1:.+]] = tensor.extract_slice %[[ITER]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32> @@ -251,7 +251,7 @@ module attributes { transform.with_named_sequence } { // CHECK: %[[BARRIER:.+]] = iree_gpu.barrier_region ins(%[[ALLOC]] : tensor<128x128xf32>) // CHECK: %[[LOOP:.+]] = scf.for %[[I:.+]] = %c0 to %c64{{.*}} step %c64{{.*}} iter_args(%[[ITER:.+]] = %{{.*}}) -> (tensor<128x128xf32>) // CHECK: %[[FLAT_ID:.+]] = affine.apply #[[$MAP4]](%[[I]], %[[L_IDY]], %[[L_IDX]], %[[W_IDX]], %[[W_IDY]]) -// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (%c64, %c1) : index, index +// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[FLAT_ID]] into (64, 1) : index, index // CHECK: %[[IDX:.+]] = affine.apply #[[$MAP5]](%[[IDS]]#0) // CHECK: %[[COPY:.+]] = linalg.copy // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[COPY]] into %[[ITER]][%[[IDX]], %[[IDS]]#1] [2, 128] @@ -311,7 +311,7 @@ module attributes { transform.with_named_sequence } { // CHECK: iree_gpu.barrier_region ins(%[[ALLOC]] // CHECK: %[[LINEARID:.+]] = affine.apply #[[$MAP1]](%[[IDX]], %[[IDY]]) // CHECK: scf.for %[[I:.+]] = %[[LINEARID]] to %c32{{.*}} step %c64{{.*}} -// CHECK: %[[IDS:.+]] = affine.delinearize_index %[[I]] into (%c32) : index +// CHECK: %[[IDS:.+]] = affine.delinearize_index %[[I]] into (32) : index // CHECK: scf.yield // CHECK: unroll_loop // CHECK: } {mapping = [#gpu.thread, #gpu.thread]} diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir index 957ca10ffd80..4a949b671442 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir @@ -393,11 +393,9 @@ func.func @data_tiled_1x1x1_tensor_multi_mma(%lhs: tensor<1x1x4x16xf32>, %rhs: t // CHECK-SAME: %[[LHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[RHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[ACC:[A-Za-z0-9]+]] -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index // CHECK: scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x4x16x4xf32>) // CHECK: %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]]) -// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]]) +// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (4, 16) // CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1] // CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1] // CHECK-DAG: %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]] @@ -432,11 +430,9 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled(%lhs: tensor<1x1x2x4x16x4x // CHECK-SAME: %[[LHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[RHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[ACC:[A-Za-z0-9]+]] -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index // CHECK: scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>) // CHECK: %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]]) -// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]]) +// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (4, 16) // CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]] // CHECK-SAME: [0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1] // CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]] @@ -474,18 +470,15 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled_to_subgroups(%lhs: tensor< // CHECK-SAME: %[[LHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[RHS:[A-Za-z0-9]+]] // CHECK-SAME: %[[ACC:[A-Za-z0-9]+]] -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index // CHECK: scf.forall (%[[THREAD_ID:.+]]) in (256) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>) // CHECK: %[[ID_CLAMPED_128:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]]) -// CHECK-DAG: %[[IN_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED_128]] into (%[[C2]], %[[C4]], %[[C16]]) +// CHECK-DAG: %[[IN_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED_128]] into (2, 4, 16) // CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]] // CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1] // CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]] // CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1] // CHECK: %[[ID_CLAMPED_256:.+]] = affine.apply #[[$MAP1]](%[[THREAD_ID]]) -// CHECK-DAG: %[[ACC_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_256]] into (%[[C2]], %[[C2]], %[[C4]], %[[C16]]) +// CHECK-DAG: %[[ACC_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_256]] into (2, 2, 4, 16) // CHECK-DAG: %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]] // CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, 0] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1] // CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS_SLICE]], %[[RHS_SLICE]], %[[ACC_SLICE]] diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir index e1805bbcd04c..e2d61a3cea4a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir @@ -553,7 +553,7 @@ hal.executable public @main { // CHECK-DAG: %[[IDY:.+]] = gpu.thread_id y // CHECK-DAG: %[[IDZ:.+]] = gpu.thread_id z // CHECK: %[[LINID0:.+]] = affine.apply #[[$MAP]]()[%[[IDX]], %[[IDY]], %[[IDZ]]] -// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (%c4, %c8) : index, index +// CHECK: %[[IDS:.+]]:2 = affine.delinearize_index %[[LINID0:.+]] into (4, 8) : index, index // CHECK: %[[LINID1:.+]] = affine.apply #[[$MAP1]]()[%[[IDS]]#0, %[[IDS]]#1] // CHECK: scf.forall ({{.*}}) in (32, 98) { // CHECK: scf.for %{{.*}} = %c0 to %c256 step %c4 {{.*}} -> (vector<1x4xf32>) diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir index 960fe0b9938c..c4de8342c9f1 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx940.mlir @@ -208,7 +208,7 @@ hal.executable @matmul_multiple_k { // CHECK-LABEL: func.func @matmul_multiple_k // CHECK: scf.for %[[IV:.+]] = %c0 to %c2048 step %c1 -// CHECK: affine.delinearize_index %[[IV]] into (%c128, %c16) +// CHECK: affine.delinearize_index %[[IV]] into (128, 16) // CHECK-COUNT-32: amdgpu.mfma // CHECK: scf.yield // CHECK-COUNT-4: vector.transfer_write {{.+}} {in_bounds = [true, true]} : vector<4x1xf16>, memref<2x10x64x64xf16, #hal.descriptor_type> diff --git a/third_party/llvm-project b/third_party/llvm-project index 8323ca8956ae..ac39504813f8 160000 --- a/third_party/llvm-project +++ b/third_party/llvm-project @@ -1 +1 @@ -Subproject commit 8323ca8956aec45713231e06768a0b330f83cce1 +Subproject commit ac39504813f8c52f10c0e364485569bff5a5f7a1