Skip to content

Commit ce831a2

Browse files
authored
[AMDGPU][True16][MC] true16 for v_fma_f16 (#119477)
Support true16 format for v_fma_f16 in MC. Since we are replacing v_fma_f16 to v_fma_f16_t16/v_fma_f16_fake16 in Post-GFX11, have to update the CodeGen pattern for v_fma_f16_fake16 to get CodeGen test passing. There is no pattern modified/created, but just replacing the v_fma_f16 with fake16 format.
1 parent 6f28b4b commit ce831a2

18 files changed

+1072
-320
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) {
199199
case AMDGPU::V_FMAC_F16_e64:
200200
return AMDGPU::V_FMA_F16_gfx9_e64;
201201
case AMDGPU::V_FMAC_F16_fake16_e64:
202-
return AMDGPU::V_FMA_F16_gfx9_e64;
202+
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
203203
case AMDGPU::V_FMAC_LEGACY_F32_e64:
204204
return AMDGPU::V_FMA_LEGACY_F32_e64;
205205
case AMDGPU::V_FMAC_F64_e64:

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3805,6 +3805,36 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
38053805
}
38063806
}
38073807

3808+
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
3809+
switch (Opc) {
3810+
case AMDGPU::V_MAC_F16_e32:
3811+
case AMDGPU::V_MAC_F16_e64:
3812+
return AMDGPU::V_MAD_F16_e64;
3813+
case AMDGPU::V_MAC_F32_e32:
3814+
case AMDGPU::V_MAC_F32_e64:
3815+
return AMDGPU::V_MAD_F32_e64;
3816+
case AMDGPU::V_MAC_LEGACY_F32_e32:
3817+
case AMDGPU::V_MAC_LEGACY_F32_e64:
3818+
return AMDGPU::V_MAD_LEGACY_F32_e64;
3819+
case AMDGPU::V_FMAC_LEGACY_F32_e32:
3820+
case AMDGPU::V_FMAC_LEGACY_F32_e64:
3821+
return AMDGPU::V_FMA_LEGACY_F32_e64;
3822+
case AMDGPU::V_FMAC_F16_e32:
3823+
case AMDGPU::V_FMAC_F16_e64:
3824+
case AMDGPU::V_FMAC_F16_fake16_e64:
3825+
return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3826+
: AMDGPU::V_FMA_F16_gfx9_e64;
3827+
case AMDGPU::V_FMAC_F32_e32:
3828+
case AMDGPU::V_FMAC_F32_e64:
3829+
return AMDGPU::V_FMA_F32_e64;
3830+
case AMDGPU::V_FMAC_F64_e32:
3831+
case AMDGPU::V_FMAC_F64_e64:
3832+
return AMDGPU::V_FMA_F64_e64;
3833+
default:
3834+
llvm_unreachable("invalid instruction");
3835+
}
3836+
}
3837+
38083838
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38093839
LiveVariables *LV,
38103840
LiveIntervals *LIS) const {
@@ -4040,14 +4070,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40404070
if (Src0Literal && !ST.hasVOP3Literal())
40414071
return nullptr;
40424072

4043-
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4044-
: IsF64 ? AMDGPU::V_FMA_F64_e64
4045-
: IsLegacy
4046-
? AMDGPU::V_FMA_LEGACY_F32_e64
4047-
: AMDGPU::V_FMA_F32_e64
4048-
: IsF16 ? AMDGPU::V_MAD_F16_e64
4049-
: IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4050-
: AMDGPU::V_MAD_F32_e64;
4073+
unsigned NewOpc = getNewFMAInst(ST, Opc);
4074+
40514075
if (pseudoToMCOpcode(NewOpc) == -1)
40524076
return nullptr;
40534077

@@ -9294,6 +9318,7 @@ static bool isRenamedInGFX9(int Opcode) {
92949318
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
92959319
case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
92969320
case AMDGPU::V_FMA_F16_gfx9_e64:
9321+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
92979322
case AMDGPU::V_INTERP_P2_F16:
92989323
case AMDGPU::V_MAD_F16_e64:
92999324
case AMDGPU::V_MAD_U16_e64:

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
458459
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459460
: AMDGPU::V_FMAAK_F16;
460461
break;
@@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
484485
break;
485486
case AMDGPU::V_FMA_F16_e64:
486487
case AMDGPU::V_FMA_F16_gfx9_e64:
488+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
487489
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488490
: AMDGPU::V_FMAMK_F16;
489491
break;
@@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
956958
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
957959
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
958960
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
959-
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
961+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
962+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
960963
shrinkMadFma(MI);
961964
continue;
962965
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ let FPDPRounding = 1 in {
340340

341341
let SubtargetPredicate = isGFX9Plus in {
342342
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
343-
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
343+
defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
344344
} // End SubtargetPredicate = isGFX9Plus
345345
} // End FPDPRounding = 1
346346

@@ -1708,7 +1708,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
17081708
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
17091709
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
17101710
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
1711-
defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
1711+
defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
17121712
defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
17131713
defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
17141714
defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;

llvm/test/CodeGen/AMDGPU/fma.f16.ll

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
66
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
77
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
9+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
810

911
declare half @llvm.fma.f16(half, half, half)
1012
declare half @llvm.maxnum.f16(half, half)
@@ -27,6 +29,16 @@ define half @test_fma(half %x, half %y, half %z) {
2729
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
2931
; GFX11-NEXT: s_setpc_b64 s[30:31]
32+
;
33+
; GFX12-LABEL: test_fma:
34+
; GFX12: ; %bb.0:
35+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
36+
; GFX12-NEXT: s_wait_expcnt 0x0
37+
; GFX12-NEXT: s_wait_samplecnt 0x0
38+
; GFX12-NEXT: s_wait_bvhcnt 0x0
39+
; GFX12-NEXT: s_wait_kmcnt 0x0
40+
; GFX12-NEXT: v_fma_f16 v0, v0, v1, v2
41+
; GFX12-NEXT: s_setpc_b64 s[30:31]
3042
%r = call half @llvm.fma.f16(half %x, half %y, half %z)
3143
ret half %r
3244
}
@@ -50,6 +62,16 @@ define half @test_fmac(half %x, half %y, half %z) {
5062
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5163
; GFX11-NEXT: v_fmac_f16_e32 v0, v1, v2
5264
; GFX11-NEXT: s_setpc_b64 s[30:31]
65+
;
66+
; GFX12-LABEL: test_fmac:
67+
; GFX12: ; %bb.0:
68+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
69+
; GFX12-NEXT: s_wait_expcnt 0x0
70+
; GFX12-NEXT: s_wait_samplecnt 0x0
71+
; GFX12-NEXT: s_wait_bvhcnt 0x0
72+
; GFX12-NEXT: s_wait_kmcnt 0x0
73+
; GFX12-NEXT: v_fmac_f16_e32 v0, v1, v2
74+
; GFX12-NEXT: s_setpc_b64 s[30:31]
5375
%r = call half @llvm.fma.f16(half %y, half %z, half %x)
5476
ret half %r
5577
}
@@ -81,6 +103,16 @@ define half @test_fmaak(half %x, half %y, half %z) {
81103
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82104
; GFX11-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
83105
; GFX11-NEXT: s_setpc_b64 s[30:31]
106+
;
107+
; GFX12-LABEL: test_fmaak:
108+
; GFX12: ; %bb.0:
109+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
110+
; GFX12-NEXT: s_wait_expcnt 0x0
111+
; GFX12-NEXT: s_wait_samplecnt 0x0
112+
; GFX12-NEXT: s_wait_bvhcnt 0x0
113+
; GFX12-NEXT: s_wait_kmcnt 0x0
114+
; GFX12-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
115+
; GFX12-NEXT: s_setpc_b64 s[30:31]
84116
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
85117
ret half %r
86118
}
@@ -112,6 +144,16 @@ define half @test_fmamk(half %x, half %y, half %z) {
112144
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113145
; GFX11-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2
114146
; GFX11-NEXT: s_setpc_b64 s[30:31]
147+
;
148+
; GFX12-LABEL: test_fmamk:
149+
; GFX12: ; %bb.0:
150+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
151+
; GFX12-NEXT: s_wait_expcnt 0x0
152+
; GFX12-NEXT: s_wait_samplecnt 0x0
153+
; GFX12-NEXT: s_wait_bvhcnt 0x0
154+
; GFX12-NEXT: s_wait_kmcnt 0x0
155+
; GFX12-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2
156+
; GFX12-NEXT: s_setpc_b64 s[30:31]
115157
%r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z)
116158
ret half %r
117159
}
@@ -193,6 +235,42 @@ define i32 @test_D139469_f16(half %arg) {
193235
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
194236
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
195237
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
238+
;
239+
; GFX12-SDAG-LABEL: test_D139469_f16:
240+
; GFX12-SDAG: ; %bb.0: ; %bb
241+
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
242+
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
243+
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
244+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
245+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
246+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e
247+
; GFX12-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0
248+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
249+
; GFX12-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0
250+
; GFX12-SDAG-NEXT: v_min_num_f16_e32 v0, v2, v1
251+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
252+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0
253+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
254+
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
255+
;
256+
; GFX12-GISEL-LABEL: test_D139469_f16:
257+
; GFX12-GISEL: ; %bb.0: ; %bb
258+
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
259+
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
260+
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
261+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
262+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
263+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e
264+
; GFX12-GISEL-NEXT: v_mul_f16_e32 v2, 0x291e, v0
265+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
266+
; GFX12-GISEL-NEXT: v_fmac_f16_e32 v1, 0x291e, v0
267+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
268+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
269+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v1
270+
; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0
271+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
272+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
273+
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
196274
bb:
197275
%i = fmul contract half %arg, 0xH291E
198276
%i1 = fcmp olt half %i, 0xH0000
@@ -306,6 +384,55 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
306384
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
307385
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
308386
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
387+
;
388+
; GFX12-SDAG-LABEL: test_D139469_v2f16:
389+
; GFX12-SDAG: ; %bb.0: ; %bb
390+
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
391+
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
392+
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
393+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
394+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
395+
; GFX12-SDAG-NEXT: s_movk_i32 s0, 0x211e
396+
; GFX12-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1]
397+
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
398+
; GFX12-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0]
399+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
400+
; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v1, v0
401+
; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
402+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0
403+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
404+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3)
405+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1
406+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
407+
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
408+
;
409+
; GFX12-GISEL-LABEL: test_D139469_v2f16:
410+
; GFX12-GISEL: ; %bb.0: ; %bb
411+
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
412+
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
413+
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
414+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
415+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
416+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
417+
; GFX12-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
418+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
419+
; GFX12-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
420+
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
421+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
422+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
423+
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
424+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v0
425+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s1, 0, v1
426+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
427+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s2, 0, v3
428+
; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0
429+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
430+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
431+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
432+
; GFX12-GISEL-NEXT: s_or_b32 s0, s1, s2
433+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
434+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
435+
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
309436
bb:
310437
%i = fmul contract <2 x half> %arg, <half 0xH291E, half 0xH291E>
311438
%i1 = fcmp olt <2 x half> %i, <half 0xH0000, half 0xH0000>

llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ body: |
1818
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
1919
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
2020
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
21-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
21+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
2222
%0 = IMPLICIT_DEF
2323
%1 = COPY %0.sub1
2424
%2 = COPY %0.sub0
@@ -43,7 +43,7 @@ body: |
4343
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
4444
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
4545
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
46-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
46+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
4747
%0 = IMPLICIT_DEF
4848
%1 = COPY %0.sub1
4949
%2 = COPY %0.sub0
@@ -68,7 +68,7 @@ body: |
6868
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
6969
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
7070
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
71-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
71+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
7272
%0 = IMPLICIT_DEF
7373
%1 = COPY %0.sub0
7474
%2 = COPY %0.sub1
@@ -90,7 +90,7 @@ body: |
9090
; GFX11-NEXT: {{ $}}
9191
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
9292
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
93-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
93+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
9494
; GFX11-NEXT: S_ENDPGM 0
9595
%0:vgpr_32 = COPY killed $vgpr0
9696

0 commit comments

Comments
 (0)