Skip to content

Commit 1e8c17e

Browse files
authored
[AMDGPU] Allow folding to FMAMK with SGPR and immediate operand on GFX10+ (#72258)
Allow foldImmediate to create instructions like: v_fmamk_f32 v0, s0, 0x42000000, v0 This instruction has two "scalar values": s0 and 0x42000000. On GFX10+ this is allowed. This fold was originally implemented before the compiler supported GFX10, when all ASICs were limited to one scalar value.
1 parent 767b342 commit 1e8c17e

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3445,8 +3445,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34453445
(Src1->isReg() && Src1->getReg() == Reg)) {
34463446
MachineOperand *RegSrc =
34473447
Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1;
3448-
if (!RegSrc->isReg() ||
3449-
RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())))
3448+
if (!RegSrc->isReg())
3449+
return false;
3450+
if (RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())) &&
3451+
ST.getConstantBusLimit(Opc) < 2)
34503452
return false;
34513453

34523454
if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))

llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -715,12 +715,33 @@ define amdgpu_ps i32 @s_mul_fma_32_f32(float inreg %x, float inreg %y) {
715715
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
716716
; GFX9-NEXT: ; return to shader part epilog
717717
;
718-
; GFX1011-LABEL: s_mul_fma_32_f32:
719-
; GFX1011: ; %bb.0:
720-
; GFX1011-NEXT: v_mov_b32_e32 v0, s1
721-
; GFX1011-NEXT: v_fmac_f32_e64 v0, 0x42000000, s0
722-
; GFX1011-NEXT: v_readfirstlane_b32 s0, v0
723-
; GFX1011-NEXT: ; return to shader part epilog
718+
; GFX10-SDAG-LABEL: s_mul_fma_32_f32:
719+
; GFX10-SDAG: ; %bb.0:
720+
; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, s1
721+
; GFX10-SDAG-NEXT: v_fmamk_f32 v0, s0, 0x42000000, v0
722+
; GFX10-SDAG-NEXT: v_readfirstlane_b32 s0, v0
723+
; GFX10-SDAG-NEXT: ; return to shader part epilog
724+
;
725+
; GFX10-GISEL-LABEL: s_mul_fma_32_f32:
726+
; GFX10-GISEL: ; %bb.0:
727+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s1
728+
; GFX10-GISEL-NEXT: v_fmac_f32_e64 v0, 0x42000000, s0
729+
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
730+
; GFX10-GISEL-NEXT: ; return to shader part epilog
731+
;
732+
; GFX11-SDAG-LABEL: s_mul_fma_32_f32:
733+
; GFX11-SDAG: ; %bb.0:
734+
; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s1
735+
; GFX11-SDAG-NEXT: v_fmamk_f32 v0, s0, 0x42000000, v0
736+
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
737+
; GFX11-SDAG-NEXT: ; return to shader part epilog
738+
;
739+
; GFX11-GISEL-LABEL: s_mul_fma_32_f32:
740+
; GFX11-GISEL: ; %bb.0:
741+
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s1
742+
; GFX11-GISEL-NEXT: v_fmac_f32_e64 v0, 0x42000000, s0
743+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
744+
; GFX11-GISEL-NEXT: ; return to shader part epilog
724745
%mul = fmul contract float %x, 32.0
725746
%fma = fadd contract float %mul, %y
726747
%cast = bitcast float %fma to i32

0 commit comments

Comments
 (0)