Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 3d641da

Browse files
committed
[PowerPC] Improvements for BUILD_VECTOR Vol. 4
This is the final patch in the series of patches that improves BUILD_VECTOR handling on PowerPC. This adds a few peephole optimizations to remove redundant instructions. It also adds a large test case which encompasses a large set of code patterns that build vectors - this test case was the motivator for this series of patches. Differential Revision: https://reviews.llvm.org/D26066 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288800 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a0010d9 commit 3d641da

File tree

5 files changed

+5002
-49
lines changed

5 files changed

+5002
-49
lines changed

lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -844,19 +844,9 @@ let Uses = [RM] in {
844844
def XXPERMDI : XX3Form_2<60, 10,
845845
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
846846
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
847-
let isCodeGenOnly = 1 in {
848-
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA, u2imm:$DM),
847+
let isCodeGenOnly = 1 in
848+
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
849849
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
850-
let D = 0 in
851-
def XXSPLTD0s : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
852-
"xxspltd $XT, $XA, 0", IIC_VecPerm, []>;
853-
let D = 1 in
854-
def XXSPLTD1s : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
855-
"xxspltd $XT, $XA, 1", IIC_VecPerm, []>;
856-
let D = 2 in
857-
def XXSWAPDs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
858-
"xxswapd $XT, $XA", IIC_VecPerm, []>;
859-
}
860850
def XXSEL : XX4Form<60, 3,
861851
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
862852
"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
@@ -928,6 +918,12 @@ def : InstAlias<"xxmrgld $XT, $XA, $XB",
928918
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
929919
def : InstAlias<"xxswapd $XT, $XB",
930920
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
921+
def : InstAlias<"xxspltd $XT, $XB, 0",
922+
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
923+
def : InstAlias<"xxspltd $XT, $XB, 1",
924+
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
925+
def : InstAlias<"xxswapd $XT, $XB",
926+
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
931927

932928
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
933929

@@ -2510,23 +2506,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
25102506
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
25112507
(v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
25122508
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
2513-
(v2i64 (XXSPLTD0s (LXSIBZX xoaddr:$src)))>;
2509+
(v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
25142510
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
25152511
(v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
25162512
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
2517-
(v2i64 (XXSPLTD0s (VEXTSB2Ds (LXSIBZX xoaddr:$src))))>;
2513+
(v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
25182514

25192515
// Build vectors from i16 loads
25202516
def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
25212517
(v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
25222518
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
25232519
(v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
25242520
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
2525-
(v2i64 (XXSPLTD0s (LXSIHZX xoaddr:$src)))>;
2521+
(v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
25262522
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
25272523
(v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
25282524
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
2529-
(v2i64 (XXSPLTD0s (VEXTSH2Ds (LXSIHZX xoaddr:$src))))>;
2525+
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
25302526

25312527
let Predicates = [IsBigEndian, HasP9Vector] in {
25322528
// Scalar stores of i8
@@ -2760,9 +2756,11 @@ let AddedComplexity = 400 in {
27602756

27612757
// Build vectors of floating point converted to i64.
27622758
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
2763-
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
2759+
(v2i64 (XXPERMDIs
2760+
(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
27642761
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
2765-
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
2762+
(v2i64 (XXPERMDIs
2763+
(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
27662764
def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
27672765
(v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
27682766
def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),

lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 110 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,40 @@ bool PPCMIPeephole::simplifyCode(void) {
124124
if (TrueReg1 == TrueReg2
125125
&& TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
126126
MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
127+
unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
128+
129+
// If this is a splat fed by a splatting load, the splat is
130+
// redundant. Replace with a copy. This doesn't happen directly due
131+
// to code in PPCDAGToDAGISel.cpp, but it can happen when converting
132+
// a load of a double to a vector of 64-bit integers.
133+
auto isConversionOfLoadAndSplat = [=]() -> bool {
134+
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
135+
return false;
136+
unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
137+
if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
138+
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
139+
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
140+
return true;
141+
}
142+
return false;
143+
};
144+
if (DefMI && (Immed == 0 || Immed == 3)) {
145+
if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
146+
DEBUG(dbgs()
147+
<< "Optimizing load-and-splat/splat "
148+
"to load-and-splat/copy: ");
149+
DEBUG(MI.dump());
150+
BuildMI(MBB, &MI, MI.getDebugLoc(),
151+
TII->get(PPC::COPY), MI.getOperand(0).getReg())
152+
.addOperand(MI.getOperand(1));
153+
ToErase = &MI;
154+
Simplified = true;
155+
}
156+
}
127157

128158
// If this is a splat or a swap fed by another splat, we
129159
// can replace it with a copy.
130-
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
160+
if (DefOpc == PPC::XXPERMDI) {
131161
unsigned FeedImmed = DefMI->getOperand(3).getImm();
132162
unsigned FeedReg1
133163
= lookThruCopyLike(DefMI->getOperand(1).getReg());
@@ -170,8 +200,9 @@ bool PPCMIPeephole::simplifyCode(void) {
170200
ToErase = &MI;
171201
Simplified = true;
172202
}
173-
} else if ((Immed == 0 || Immed == 3) &&
174-
DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) {
203+
} else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
204+
(DefMI->getOperand(2).getImm() == 0 ||
205+
DefMI->getOperand(2).getImm() == 3)) {
175206
// Splat fed by another splat - switch the output of the first
176207
// and remove the second.
177208
DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
@@ -190,17 +221,32 @@ bool PPCMIPeephole::simplifyCode(void) {
190221
unsigned MyOpcode = MI.getOpcode();
191222
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
192223
unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
224+
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
225+
break;
193226
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
194227
if (!DefMI)
195228
break;
196229
unsigned DefOpcode = DefMI->getOpcode();
197-
bool SameOpcode = (MyOpcode == DefOpcode) ||
230+
auto isConvertOfSplat = [=]() -> bool {
231+
if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
232+
return false;
233+
unsigned ConvReg = DefMI->getOperand(1).getReg();
234+
if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
235+
return false;
236+
MachineInstr *Splt = MRI->getVRegDef(ConvReg);
237+
return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
238+
Splt->getOpcode() == PPC::XXSPLTW);
239+
};
240+
bool AlreadySplat = (MyOpcode == DefOpcode) ||
198241
(MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
199242
(MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
200-
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs);
201-
// Splat fed by another splat - switch the output of the first
202-
// and remove the second.
203-
if (SameOpcode) {
243+
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
244+
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
245+
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
246+
(MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
247+
// If the instruction[s] that feed this splat have already splat
248+
// the value, this splat is redundant.
249+
if (AlreadySplat) {
204250
DEBUG(dbgs() << "Changing redundant splat to a copy: ");
205251
DEBUG(MI.dump());
206252
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
@@ -234,9 +280,64 @@ bool PPCMIPeephole::simplifyCode(void) {
234280
}
235281
break;
236282
}
283+
case PPC::XVCVDPSP: {
284+
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
285+
unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
286+
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
287+
break;
288+
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
289+
290+
// This can occur when building a vector of single precision or integer
291+
// values.
292+
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
293+
unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
294+
unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
295+
if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
296+
!TargetRegisterInfo::isVirtualRegister(DefsReg2))
297+
break;
298+
MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
299+
MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
300+
301+
if (!P1 || !P2)
302+
break;
303+
304+
// Remove the passed FRSP instruction if it only feeds this MI and
305+
// set any uses of that FRSP (in this MI) to the source of the FRSP.
306+
auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
307+
if (RoundInstr->getOpcode() == PPC::FRSP &&
308+
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
309+
Simplified = true;
310+
unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
311+
unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
312+
MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
313+
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
314+
if (Use.getOperand(i).isReg() &&
315+
Use.getOperand(i).getReg() == FRSPDefines)
316+
Use.getOperand(i).setReg(ConvReg1);
317+
DEBUG(dbgs() << "Removing redundant FRSP:\n");
318+
DEBUG(RoundInstr->dump());
319+
DEBUG(dbgs() << "As it feeds instruction:\n");
320+
DEBUG(MI.dump());
321+
DEBUG(dbgs() << "Through instruction:\n");
322+
DEBUG(DefMI->dump());
323+
RoundInstr->eraseFromParent();
324+
}
325+
};
326+
327+
// If the input to XVCVDPSP is a vector that was built (even
328+
// partially) out of FRSP's, the FRSP(s) can safely be removed
329+
// since this instruction performs the same operation.
330+
if (P1 != P2) {
331+
removeFRSPIfPossible(P1);
332+
removeFRSPIfPossible(P2);
333+
break;
334+
}
335+
removeFRSPIfPossible(P1);
336+
}
337+
break;
338+
}
237339
}
238340
}
239-
240341
// If the last instruction was marked for elimination,
241342
// remove it now.
242343
if (ToErase) {

0 commit comments

Comments
 (0)