Skip to content

Commit 0205806

Browse files
committed
[AMDGPU] Convert mac/fmac to mad/fma when folding output modifiers
Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac instruction, so we might as well convert it to the more flexible VOP3- only mad/fma form. With this change, the only way we should emit VOP3-encoded mac/fmac is if regalloc chooses registers that require the VOP3 encoding, e.g. sgprs for both src0 and src1. In all other cases the mac/fmac should either be converted to mad/fma or shrunk to VOP2 encoding. Differential Revision: https://reviews.llvm.org/D110156
1 parent 3828ea6 commit 0205806

File tree

3 files changed

+18
-3
lines changed

3 files changed

+18
-3
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,14 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
13881388
DefClamp->setImm(1);
13891389
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
13901390
MI.eraseFromParent();
1391+
1392+
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1393+
// instruction, so we might as well convert it to the more flexible VOP3-only
1394+
// mad/fma form.
1395+
MachineFunction::iterator MBBI = Def->getParent()->getIterator();
1396+
if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr))
1397+
Def->eraseFromParent();
1398+
13911399
return true;
13921400
}
13931401

@@ -1526,6 +1534,14 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
15261534
DefOMod->setImm(OMod);
15271535
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
15281536
MI.eraseFromParent();
1537+
1538+
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1539+
// instruction, so we might as well convert it to the more flexible VOP3-only
1540+
// mad/fma form.
1541+
MachineFunction::iterator MBBI = Def->getParent()->getIterator();
1542+
if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr))
1543+
Def->eraseFromParent();
1544+
15291545
return true;
15301546
}
15311547

llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
7070
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
7171
; GFX9-NEXT: s_setpc_b64
7272

73-
; CIVI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
73+
; CIVI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
7474
define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 {
7575
%src0.ext = fpext half %src0 to float
7676
%src1.ext = fpext half %src1 to float

llvm/test/CodeGen/AMDGPU/mad-mix.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,7 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
328328
; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
329329
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
330330
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
331-
; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
332-
; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
331+
; CIVI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
333332
define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
334333
%src0.hi = extractelement <2 x half> %src0, i32 1
335334
%src1.hi = extractelement <2 x half> %src1, i32 1

0 commit comments

Comments
 (0)