Skip to content

Commit e60d658

Browse files
committed
AMDGPU/GlobalISel: Handle VOP3NoMods
1 parent d309b4e commit e60d658

File tree

5 files changed

+35
-16
lines changed

5 files changed

+35
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def gi_vop3mods :
3131
GIComplexOperandMatcher<s32, "selectVOP3Mods">,
3232
GIComplexPatternEquiv<VOP3Mods>;
3333

34+
def gi_vop3_no_mods :
35+
GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
36+
GIComplexPatternEquiv<VOP3NoMods>;
37+
3438
def gi_vop3mods_nnan :
3539
GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">,
3640
GIComplexPatternEquiv<VOP3Mods_nnan>;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,6 +2218,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
22182218
}};
22192219
}
22202220

2221+
InstructionSelector::ComplexRendererFns
2222+
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
2223+
Register Reg = Root.getReg();
2224+
const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
2225+
if (Def && (Def->getOpcode() == AMDGPU::G_FNEG ||
2226+
Def->getOpcode() == AMDGPU::G_FABS))
2227+
return {};
2228+
return {{
2229+
[=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
2230+
}};
2231+
}
2232+
22212233
InstructionSelector::ComplexRendererFns
22222234
AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
22232235
Register Src;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ class AMDGPUInstructionSelector : public InstructionSelector {
142142
selectVOP3OMods(MachineOperand &Root) const;
143143
InstructionSelector::ComplexRendererFns
144144
selectVOP3Mods(MachineOperand &Root) const;
145+
146+
ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
147+
145148
InstructionSelector::ComplexRendererFns
146149
selectVOP3Mods_nnan(MachineOperand &Root) const;
147150

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ body: |
2323
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2424
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2525
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
26-
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
27-
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
26+
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
27+
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
2828
; GFX10-LABEL: name: fma_f32
2929
; GFX10: $vcc_hi = IMPLICIT_DEF
3030
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3131
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3232
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
33-
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
34-
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
33+
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
34+
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
3535
%0:vgpr(s32) = COPY $vgpr0
3636
%1:vgpr(s32) = COPY $vgpr1
3737
%2:vgpr(s32) = COPY $vgpr2
@@ -60,15 +60,15 @@ body: |
6060
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6161
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6262
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
63-
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
64-
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
63+
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
64+
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
6565
; GFX10-LABEL: name: fma_f32_fneg_src0
6666
; GFX10: $vcc_hi = IMPLICIT_DEF
6767
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6868
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6969
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
70-
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
71-
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
70+
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
71+
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
7272
%0:vgpr(s32) = COPY $vgpr0
7373
%1:vgpr(s32) = COPY $vgpr1
7474
%2:vgpr(s32) = COPY $vgpr2
@@ -98,15 +98,15 @@ body: |
9898
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9999
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
100100
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
101-
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
102-
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
101+
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
102+
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
103103
; GFX10-LABEL: name: fma_f32_fneg_src1
104104
; GFX10: $vcc_hi = IMPLICIT_DEF
105105
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
106106
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
107107
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
108-
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
109-
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
108+
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
109+
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
110110
%0:vgpr(s32) = COPY $vgpr0
111111
%1:vgpr(s32) = COPY $vgpr1
112112
%2:vgpr(s32) = COPY $vgpr2

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ body: |
1616
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1818
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
19-
; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
20-
; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]]
19+
; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
20+
; GFX6: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
2121
; GFX10-LABEL: name: fmad_f32
2222
; GFX10: $vcc_hi = IMPLICIT_DEF
2323
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2424
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2525
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
26-
; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
27-
; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]]
26+
; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
27+
; GFX10: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
2828
%0:vgpr(s32) = COPY $vgpr0
2929
%1:vgpr(s32) = COPY $vgpr1
3030
%2:vgpr(s32) = COPY $vgpr2

0 commit comments

Comments
 (0)