@@ -496,6 +496,62 @@ func.func @matmul_lowering_f32f32f32_gfx942() attributes {
496496
497497// ----- 
498498
499+ // This tests that the padding resolver can handle partial loads/stores. The 
500+ // offsets, sizes and strides are arbitrarily chosen in the test. 
501+ 
502+ #executable_target_rocm_hsaco_fb  = #hal.executable.target <" rocm" " rocm-hsaco-fb" 
503+   {
504+     abi  = " hip" 
505+     iree.encoding.resolver  = #iree_gpu.gpu_padding_resolver <>,
506+     iree_codegen.target_info  = #iree_gpu.target <arch  = " gfx942" 
507+                                        features  = " " 
508+                                        wgp  = <compute  = fp32 ,
509+                                               storage  =  b32 ,
510+                                               subgroup  =  none ,
511+                                               mma  = [<MFMA_F32_16x16x4_F32 >],
512+                                               subgroup_size_choices  = [64 ],
513+                                               max_workgroup_sizes  = [1024 , 1024 , 1024 ],
514+                                               max_thread_count_per_workgroup  = 1024 ,
515+                                               max_workgroup_memory_bytes  = 65536 ,
516+                                               max_workgroup_counts  = [2147483647 , 2147483647 , 2147483647 ],
517+                                               max_load_instruction_bits  = 128 ,
518+                                               simds_per_wgp  = 4 ,
519+                                               vgpr_space_bits  = 16384 >>
520+   }>
521+ 
522+ #binding_ro  = #hal.pipeline.binding <storage_buffer , " ReadOnly|Indirect" 
523+ #binding  = #hal.pipeline.binding <storage_buffer , Indirect >
524+ #encoding_mmt  = #iree_encoding.encoding <operand_index  = 0  : index , op_type  = matmul , element_types  = [f16 , f16 , f16 ]>
525+ #pad_encoding  = #iree_encoding.layout <[#iree_encoding.padding <[0 , 64 ]>]>
526+ func.func  @set_pad_encoding_and_partial_load_store ()  attributes  {
527+   hal.executable.target  = #executable_target_rocm_hsaco_fb 
528+ } {
529+   %c0  = arith.constant  0  : index 
530+   %0  = hal.interface.constant.load  layout (<constants  = 1 , bindings  = [#binding_ro , #binding ], flags  = Indirect >) ordinal (0 ) : i32 
531+   %1  = arith.index_castui  %0  : i32  to  index 
532+   %3  = hal.interface.binding.subspan  layout (<constants  = 1 , bindings  = [#binding_ro , #binding ], flags  = Indirect >) binding (0 ) alignment (64 ) offset (%1 ) flags (" ReadOnly|Indirect" 
533+     : !iree_tensor_ext.dispatch.tensor <readonly :tensor <2048 x2048 xf16 >>
534+   %4  = hal.interface.binding.subspan  layout (<constants  = 1 , bindings  = [#binding_ro , #binding ], flags  = Indirect >) binding (1 ) alignment (64 ) offset (%c0 ) flags (Indirect )
535+     : !iree_tensor_ext.dispatch.tensor <writeonly :tensor <2048 x2048 xf16 , #pad_encoding >>
536+   %5  = iree_tensor_ext.dispatch.tensor.load  %3 , offsets  = [0 , 0 ], sizes  = [1024 , 1024 ], strides  = [2 , 2 ]
537+     : !iree_tensor_ext.dispatch.tensor <readonly :tensor <2048 x2048 xf16 >> -> tensor <1024 x1024 xf16 >
538+   %6  = iree_encoding.set_encoding  %5  : tensor <1024 x1024 xf16 > -> tensor <1024 x1024 xf16 , #encoding_mmt >
539+   iree_tensor_ext.dispatch.tensor.store  %6 , %4 , offsets  = [0 , 0 ], sizes  = [1024 , 1024 ], strides  = [2 , 2 ]
540+     : tensor <1024 x1024 xf16 , #encoding_mmt > -> !iree_tensor_ext.dispatch.tensor <writeonly :tensor <2048 x2048 xf16 , #pad_encoding >>
541+   return 
542+ }
543+ // CHECK-LABEL: @set_pad_encoding_and_partial_load_store 
544+ // CHECK:         %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) 
545+ // CHECK-SAME:                  !iree_tensor_ext.dispatch.tensor<readonly:tensor<2048x2048xf16>> 
546+ // CHECK:         %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) 
547+ // CHECK-SAME:                  !iree_tensor_ext.dispatch.tensor<writeonly:tensor<2048x2112xf16>> 
548+ // CHECK:         %[[LD:.+]] = iree_tensor_ext.dispatch.tensor.load %[[A]], offsets = [0, 0], sizes = [1024, 1024], strides = [2, 2] 
549+ // CHECK-SAME:                  !iree_tensor_ext.dispatch.tensor<readonly:tensor<2048x2048xf16>> -> tensor<1024x1024xf16> 
550+ // CHECK:         iree_tensor_ext.dispatch.tensor.store %[[LD]], %[[B]], offsets = [0, 0], sizes = [1024, 1024], strides = [2, 2] 
551+ // CHECK-SAME:                  tensor<1024x1024xf16> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<2048x2112xf16>> 
552+ 
553+ // ----- 
554+ 
499555//----------------------------------------------------------------------------// 
500556// Test suite for encodings with resolved layouts. 
501557// All the implementations use interfaces, so we only check with CPU encoding 
0 commit comments