Skip to content

Commit 6137af9

Browse files
committed
V_FMA_F16 true16 in MC
1 parent 4ad0fdd commit 6137af9

18 files changed

+1072
-320
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) {
199199
case AMDGPU::V_FMAC_F16_e64:
200200
return AMDGPU::V_FMA_F16_gfx9_e64;
201201
case AMDGPU::V_FMAC_F16_fake16_e64:
202-
return AMDGPU::V_FMA_F16_gfx9_e64;
202+
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
203203
case AMDGPU::V_FMAC_LEGACY_F32_e64:
204204
return AMDGPU::V_FMA_LEGACY_F32_e64;
205205
case AMDGPU::V_FMAC_F64_e64:

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3798,6 +3798,36 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
37983798
}
37993799
}
38003800

3801+
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
3802+
switch (Opc) {
3803+
case AMDGPU::V_MAC_F16_e32:
3804+
case AMDGPU::V_MAC_F16_e64:
3805+
return AMDGPU::V_MAD_F16_e64;
3806+
case AMDGPU::V_MAC_F32_e32:
3807+
case AMDGPU::V_MAC_F32_e64:
3808+
return AMDGPU::V_MAD_F32_e64;
3809+
case AMDGPU::V_MAC_LEGACY_F32_e32:
3810+
case AMDGPU::V_MAC_LEGACY_F32_e64:
3811+
return AMDGPU::V_MAD_LEGACY_F32_e64;
3812+
case AMDGPU::V_FMAC_LEGACY_F32_e32:
3813+
case AMDGPU::V_FMAC_LEGACY_F32_e64:
3814+
return AMDGPU::V_FMA_LEGACY_F32_e64;
3815+
case AMDGPU::V_FMAC_F16_e32:
3816+
case AMDGPU::V_FMAC_F16_e64:
3817+
case AMDGPU::V_FMAC_F16_fake16_e64:
3818+
return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3819+
: AMDGPU::V_FMA_F16_gfx9_e64;
3820+
case AMDGPU::V_FMAC_F32_e32:
3821+
case AMDGPU::V_FMAC_F32_e64:
3822+
return AMDGPU::V_FMA_F32_e64;
3823+
case AMDGPU::V_FMAC_F64_e32:
3824+
case AMDGPU::V_FMAC_F64_e64:
3825+
return AMDGPU::V_FMA_F64_e64;
3826+
default:
3827+
llvm_unreachable("invalid instruction");
3828+
}
3829+
}
3830+
38013831
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38023832
LiveVariables *LV,
38033833
LiveIntervals *LIS) const {
@@ -4033,14 +4063,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40334063
if (Src0Literal && !ST.hasVOP3Literal())
40344064
return nullptr;
40354065

4036-
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4037-
: IsF64 ? AMDGPU::V_FMA_F64_e64
4038-
: IsLegacy
4039-
? AMDGPU::V_FMA_LEGACY_F32_e64
4040-
: AMDGPU::V_FMA_F32_e64
4041-
: IsF16 ? AMDGPU::V_MAD_F16_e64
4042-
: IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4043-
: AMDGPU::V_MAD_F32_e64;
4066+
unsigned NewOpc = getNewFMAInst(ST, Opc);
4067+
40444068
if (pseudoToMCOpcode(NewOpc) == -1)
40454069
return nullptr;
40464070

@@ -9286,6 +9310,7 @@ static bool isRenamedInGFX9(int Opcode) {
92869310
//
92879311
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
92889312
case AMDGPU::V_FMA_F16_gfx9_e64:
9313+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
92899314
case AMDGPU::V_INTERP_P2_F16:
92909315
case AMDGPU::V_MAD_F16_e64:
92919316
case AMDGPU::V_MAD_U16_e64:

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
458459
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459460
: AMDGPU::V_FMAAK_F16;
460461
break;
@@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
484485
break;
485486
case AMDGPU::V_FMA_F16_e64:
486487
case AMDGPU::V_FMA_F16_gfx9_e64:
488+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
487489
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488490
: AMDGPU::V_FMAMK_F16;
489491
break;
@@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
956958
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
957959
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
958960
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
959-
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
961+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
962+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
960963
shrinkMadFma(MI);
961964
continue;
962965
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ let FPDPRounding = 1 in {
341341
let SubtargetPredicate = isGFX9Plus in {
342342
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
343343
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
344-
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
344+
defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
345345
} // End SubtargetPredicate = isGFX9Plus
346346
} // End FPDPRounding = 1
347347

@@ -1709,7 +1709,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
17091709
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
17101710
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
17111711
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
1712-
defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
1712+
defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
17131713
defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
17141714
defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
17151715
defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;

llvm/test/CodeGen/AMDGPU/fma.f16.ll

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
66
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
77
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
9+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
810

911
declare half @llvm.fma.f16(half, half, half)
1012
declare half @llvm.maxnum.f16(half, half)
@@ -27,6 +29,16 @@ define half @test_fma(half %x, half %y, half %z) {
2729
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
2931
; GFX11-NEXT: s_setpc_b64 s[30:31]
32+
;
33+
; GFX12-LABEL: test_fma:
34+
; GFX12: ; %bb.0:
35+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
36+
; GFX12-NEXT: s_wait_expcnt 0x0
37+
; GFX12-NEXT: s_wait_samplecnt 0x0
38+
; GFX12-NEXT: s_wait_bvhcnt 0x0
39+
; GFX12-NEXT: s_wait_kmcnt 0x0
40+
; GFX12-NEXT: v_fma_f16 v0, v0, v1, v2
41+
; GFX12-NEXT: s_setpc_b64 s[30:31]
3042
%r = call half @llvm.fma.f16(half %x, half %y, half %z)
3143
ret half %r
3244
}
@@ -50,6 +62,16 @@ define half @test_fmac(half %x, half %y, half %z) {
5062
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5163
; GFX11-NEXT: v_fmac_f16_e32 v0, v1, v2
5264
; GFX11-NEXT: s_setpc_b64 s[30:31]
65+
;
66+
; GFX12-LABEL: test_fmac:
67+
; GFX12: ; %bb.0:
68+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
69+
; GFX12-NEXT: s_wait_expcnt 0x0
70+
; GFX12-NEXT: s_wait_samplecnt 0x0
71+
; GFX12-NEXT: s_wait_bvhcnt 0x0
72+
; GFX12-NEXT: s_wait_kmcnt 0x0
73+
; GFX12-NEXT: v_fmac_f16_e32 v0, v1, v2
74+
; GFX12-NEXT: s_setpc_b64 s[30:31]
5375
%r = call half @llvm.fma.f16(half %y, half %z, half %x)
5476
ret half %r
5577
}
@@ -81,6 +103,16 @@ define half @test_fmaak(half %x, half %y, half %z) {
81103
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82104
; GFX11-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
83105
; GFX11-NEXT: s_setpc_b64 s[30:31]
106+
;
107+
; GFX12-LABEL: test_fmaak:
108+
; GFX12: ; %bb.0:
109+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
110+
; GFX12-NEXT: s_wait_expcnt 0x0
111+
; GFX12-NEXT: s_wait_samplecnt 0x0
112+
; GFX12-NEXT: s_wait_bvhcnt 0x0
113+
; GFX12-NEXT: s_wait_kmcnt 0x0
114+
; GFX12-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
115+
; GFX12-NEXT: s_setpc_b64 s[30:31]
84116
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
85117
ret half %r
86118
}
@@ -112,6 +144,16 @@ define half @test_fmamk(half %x, half %y, half %z) {
112144
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113145
; GFX11-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2
114146
; GFX11-NEXT: s_setpc_b64 s[30:31]
147+
;
148+
; GFX12-LABEL: test_fmamk:
149+
; GFX12: ; %bb.0:
150+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
151+
; GFX12-NEXT: s_wait_expcnt 0x0
152+
; GFX12-NEXT: s_wait_samplecnt 0x0
153+
; GFX12-NEXT: s_wait_bvhcnt 0x0
154+
; GFX12-NEXT: s_wait_kmcnt 0x0
155+
; GFX12-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2
156+
; GFX12-NEXT: s_setpc_b64 s[30:31]
115157
%r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z)
116158
ret half %r
117159
}
@@ -193,6 +235,42 @@ define i32 @test_D139469_f16(half %arg) {
193235
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
194236
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
195237
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
238+
;
239+
; GFX12-SDAG-LABEL: test_D139469_f16:
240+
; GFX12-SDAG: ; %bb.0: ; %bb
241+
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
242+
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
243+
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
244+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
245+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
246+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e
247+
; GFX12-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0
248+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
249+
; GFX12-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0
250+
; GFX12-SDAG-NEXT: v_min_num_f16_e32 v0, v2, v1
251+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
252+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0
253+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
254+
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
255+
;
256+
; GFX12-GISEL-LABEL: test_D139469_f16:
257+
; GFX12-GISEL: ; %bb.0: ; %bb
258+
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
259+
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
260+
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
261+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
262+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
263+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e
264+
; GFX12-GISEL-NEXT: v_mul_f16_e32 v2, 0x291e, v0
265+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
266+
; GFX12-GISEL-NEXT: v_fmac_f16_e32 v1, 0x291e, v0
267+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
268+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
269+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v1
270+
; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0
271+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
272+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
273+
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
196274
bb:
197275
%i = fmul contract half %arg, 0xH291E
198276
%i1 = fcmp olt half %i, 0xH0000
@@ -306,6 +384,55 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
306384
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
307385
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
308386
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
387+
;
388+
; GFX12-SDAG-LABEL: test_D139469_v2f16:
389+
; GFX12-SDAG: ; %bb.0: ; %bb
390+
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
391+
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
392+
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
393+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
394+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
395+
; GFX12-SDAG-NEXT: s_movk_i32 s0, 0x211e
396+
; GFX12-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1]
397+
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
398+
; GFX12-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0]
399+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
400+
; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v1, v0
401+
; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
402+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0
403+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
404+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3)
405+
; GFX12-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1
406+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
407+
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
408+
;
409+
; GFX12-GISEL-LABEL: test_D139469_v2f16:
410+
; GFX12-GISEL: ; %bb.0: ; %bb
411+
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
412+
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
413+
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
414+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
415+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
416+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
417+
; GFX12-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
418+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
419+
; GFX12-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
420+
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
421+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
422+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
423+
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
424+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v0
425+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s1, 0, v1
426+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
427+
; GFX12-GISEL-NEXT: v_cmp_gt_f16_e64 s2, 0, v3
428+
; GFX12-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0
429+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
430+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
431+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
432+
; GFX12-GISEL-NEXT: s_or_b32 s0, s1, s2
433+
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
434+
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
435+
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
309436
bb:
310437
%i = fmul contract <2 x half> %arg, <half 0xH291E, half 0xH291E>
311438
%i1 = fcmp olt <2 x half> %i, <half 0xH0000, half 0xH0000>

llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ body: |
1818
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
1919
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
2020
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
21-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
21+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
2222
%0 = IMPLICIT_DEF
2323
%1 = COPY %0.sub1
2424
%2 = COPY %0.sub0
@@ -43,7 +43,7 @@ body: |
4343
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
4444
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
4545
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
46-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
46+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
4747
%0 = IMPLICIT_DEF
4848
%1 = COPY %0.sub1
4949
%2 = COPY %0.sub0
@@ -68,7 +68,7 @@ body: |
6868
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
6969
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
7070
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
71-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
71+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
7272
%0 = IMPLICIT_DEF
7373
%1 = COPY %0.sub0
7474
%2 = COPY %0.sub1
@@ -90,7 +90,7 @@ body: |
9090
; GFX11-NEXT: {{ $}}
9191
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
9292
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
93-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
93+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
9494
; GFX11-NEXT: S_ENDPGM 0
9595
%0:vgpr_32 = COPY killed $vgpr0
9696

0 commit comments

Comments
 (0)