Skip to content

Commit ff23ee4

Browse files
authored
[AMDGPU] Add support for v_cvt_pk_f16_fp8 on gfx1250 (#145747)
Co-authored-by: Shilei Tian <[email protected]>
1 parent 6fd182a commit ff23ee4

25 files changed

+280
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,5 +642,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
642642

643643
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
644644

645+
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
646+
645647
#undef BUILTIN
646648
#undef TARGET_BUILTIN

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,23 @@
1010
void test_setprio_inc_wg() {
1111
__builtin_amdgcn_s_setprio_inc_wg(10);
1212
}
13+
14+
// CHECK-LABEL: @test_cvt_pk_f16_fp8(
15+
// CHECK-NEXT: entry:
16+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
17+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
18+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
19+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
20+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
21+
// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
22+
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2
23+
// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 [[TMP0]])
24+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
25+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0
26+
// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4
27+
// CHECK-NEXT: ret void
28+
//
29+
void test_cvt_pk_f16_fp8(global half2* out, short a)
30+
{
31+
out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a);
32+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
588588
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
589589
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
590590

591+
def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
592+
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
593+
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
594+
591595
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
592596
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
593597
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4541,6 +4541,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45414541
case Intrinsic::amdgcn_cvt_pknorm_u16:
45424542
case Intrinsic::amdgcn_cvt_pk_i16:
45434543
case Intrinsic::amdgcn_cvt_pk_u16:
4544+
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
45444545
case Intrinsic::amdgcn_fmed3:
45454546
case Intrinsic::amdgcn_cubeid:
45464547
case Intrinsic::amdgcn_cubema:

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,21 @@ let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts]
729729
}
730730
}
731731

732+
// FIXME-TRUE16: True16 versions of these instructions are untested.
733+
let HasExtSDWA = 0, HasOpSel = 1, EmitDstSel = 0, HasOMod = 0, HasModifiers = 1 in {
734+
def VOPProfile_CVT_PK_F16_F8 : VOPProfile<[v2f16, i16, untyped, untyped]>;
735+
def VOPProfile_CVT_PK_F16_F8_true16 : VOP3_Profile_True16<VOPProfile_CVT_PK_F16_F8>;
736+
def VOPProfile_CVT_PK_F16_F8_fake16 : VOP3_Profile_Fake16<VOPProfile_CVT_PK_F16_F8>;
737+
}
738+
739+
let SubtargetPredicate = isGFX1250Plus in {
740+
let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
741+
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
742+
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
743+
int_amdgcn_cvt_pk_f16_fp8>;
744+
}
745+
} // End SubtargetPredicate = isGFX1250Plus
746+
732747
let SubtargetPredicate = isGFX10Plus in {
733748
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;
734749

@@ -1062,6 +1077,7 @@ defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
10621077
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
10631078

10641079
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
1080+
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
10651081

10661082
//===----------------------------------------------------------------------===//
10671083
// GFX10.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s
3+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s
4+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s
6+
7+
define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) {
8+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
9+
; GFX1250-SDAG-REAL16: ; %bb.0:
10+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
11+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
12+
;
13+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v:
14+
; GFX1250-SDAG-FAKE16: ; %bb.0:
15+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
16+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
17+
;
18+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
19+
; GFX1250-GISEL-REAL16: ; %bb.0:
20+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
21+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
22+
;
23+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v:
24+
; GFX1250-GISEL-FAKE16: ; %bb.0:
25+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
26+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
27+
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a)
28+
%ret = bitcast <2 x half> %cvt to float
29+
ret float %ret
30+
}
31+
32+
define amdgpu_ps float @test_cvt_pk_f16_fp8_s(i16 inreg %a) {
33+
; GFX1250-LABEL: test_cvt_pk_f16_fp8_s:
34+
; GFX1250: ; %bb.0:
35+
; GFX1250-NEXT: v_cvt_pk_f16_fp8 v0, s0
36+
; GFX1250-NEXT: ; return to shader part epilog
37+
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a)
38+
%ret = bitcast <2 x half> %cvt to float
39+
ret float %ret
40+
}
41+
42+
define amdgpu_ps float @test_cvt_pk_f16_fp8_v_hi(<2 x i16> %a) {
43+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi:
44+
; GFX1250-SDAG-REAL16: ; %bb.0:
45+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.h
46+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
47+
;
48+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi:
49+
; GFX1250-SDAG-FAKE16: ; %bb.0:
50+
; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
51+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
52+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
53+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
54+
;
55+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi:
56+
; GFX1250-GISEL-REAL16: ; %bb.0:
57+
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
58+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
59+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
60+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
61+
;
62+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi:
63+
; GFX1250-GISEL-FAKE16: ; %bb.0:
64+
; GFX1250-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
65+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
66+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
67+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
68+
%a.1 = extractelement <2 x i16> %a, i32 1
69+
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a.1)
70+
%ret = bitcast <2 x half> %cvt to float
71+
ret float %ret
72+
}

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,12 @@ v_cvt_f32_bf16 v5, src_scc
4545

4646
v_cvt_f32_bf16 v127, 0x8000
4747
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
48+
49+
v_cvt_pk_f16_fp8 v1, v2
50+
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
51+
52+
v_cvt_pk_f16_fp8 v1, s2
53+
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]
54+
55+
v_cvt_pk_f16_fp8 v1, 100
56+
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,12 @@ v_cvt_f32_bf16 v127, 0x8000
4848

4949
v_cvt_f32_bf16 v5, v1.h
5050
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
51+
52+
v_cvt_pk_f16_fp8 v1, v2
53+
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
54+
55+
v_cvt_pk_f16_fp8 v1, s2
56+
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]
57+
58+
v_cvt_pk_f16_fp8 v1, 100
59+
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,7 @@ v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
5757
v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
5858
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
5959
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
60+
61+
v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
62+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,11 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
6161
v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
6262
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
6363
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
66+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_cvt_pk_f16_fp8 v1, v2.h quad_perm:[0,1,2,3]
70+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
1313
v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
1414
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
1515
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
16+
17+
v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
18+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,11 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
1717
v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0]
1818
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
1919
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
22+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
25+
v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
26+
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
27+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
22

3+
v_cvt_pk_f16_fp8 v1, v2 clamp
4+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5+
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp
6+
// GFX1250-ERR-NEXT:{{^}} ^
7+
8+
v_cvt_pk_f16_fp8 v1, v2 mul:2
9+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
10+
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 mul:2
11+
// GFX1250-ERR-NEXT:{{^}} ^
12+
313
v_cvt_f32_bf16 v5, v1 clamp
414
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
515
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 clamp

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,15 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
7575

7676
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7777
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]
78+
79+
v_cvt_pk_f16_fp8 v1, v150
80+
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
81+
82+
v_cvt_pk_f16_fp8 v1, v2 op_sel:[1]
83+
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00]
84+
85+
v_cvt_pk_f16_fp8 v1, v150 op_sel:[1]
86+
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00]
87+
88+
v_cvt_pk_f16_fp8 v1, s2 op_sel:[1]
89+
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00]

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,15 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7878

7979
v_cvt_f32_bf16_e64 v5, v128.h
8080
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]
81+
82+
v_cvt_pk_f16_fp8 v1, v150
83+
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
84+
85+
v_cvt_pk_f16_fp8 v1, v2 op_sel:[1]
86+
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00]
87+
88+
v_cvt_pk_f16_fp8 v1, v150 op_sel:[1]
89+
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00]
90+
91+
v_cvt_pk_f16_fp8 v1, s2 op_sel:[1]
92+
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00]

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15
4545
v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
4646
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
4747
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
48+
49+
v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
50+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
51+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
54+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x04,0xff]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
4949
v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0]
5050
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
5151
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
54+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
57+
v_cvt_pk_f16_fp8 v1, v128.h quad_perm:[0,1,2,3]
58+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff]
59+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,11 @@
55
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
66
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
77
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_pk_f16_fp8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1
10+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0]
14+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
99
v_cvt_f32_bf16_e64_dpp v5, v128.h dpp8:[7,6,5,4,3,2,1,0]
1010
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
1111
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_pk_f16_fp8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1
14+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
16+
17+
v_cvt_pk_f16_fp8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0]
18+
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,13 @@
4949

5050
0x81,0xe5,0x0a,0x7e
5151
# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
52+
53+
0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00
54+
# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
55+
56+
0x02,0xea,0x02,0x7e
57+
# GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]
58+
59+
0x02,0xeb,0x02,0x7e
60+
# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
61+
# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,11 @@
4646

4747
0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff
4848
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
49+
50+
0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff
51+
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
52+
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
53+
54+
0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff
55+
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]
56+
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,11 @@
1313

1414
0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05
1515
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
16+
17+
0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05
18+
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
19+
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
20+
21+
0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05
22+
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
23+
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]

0 commit comments

Comments
 (0)