@@ -235,10 +235,6 @@ static bool isNarrowStore(unsigned Opc) {
235
235
}
236
236
}
237
237
238
- static bool isNarrowStore (MachineInstr *MI) {
239
- return isNarrowStore (MI->getOpcode ());
240
- }
241
-
242
238
static bool isNarrowLoad (unsigned Opc) {
243
239
switch (Opc) {
244
240
default :
@@ -386,6 +382,10 @@ static unsigned getMatchingWideOpcode(unsigned Opc) {
386
382
return AArch64::STURHHi;
387
383
case AArch64::STURHHi:
388
384
return AArch64::STURWi;
385
+ case AArch64::STURWi:
386
+ return AArch64::STURXi;
387
+ case AArch64::STRWui:
388
+ return AArch64::STRXui;
389
389
case AArch64::LDRHHui:
390
390
case AArch64::LDRSHWui:
391
391
return AArch64::LDRWui;
@@ -640,6 +640,16 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
640
640
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
641
641
}
642
642
643
+ static bool isPromotableZeroStoreOpcode (MachineInstr *MI) {
644
+ unsigned Opc = MI->getOpcode ();
645
+ return isNarrowStore (Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
646
+ }
647
+
648
+ static bool isPromotableZeroStoreInst (MachineInstr *MI) {
649
+ return (isPromotableZeroStoreOpcode (MI)) &&
650
+ getLdStRegOp (MI).getReg () == AArch64::WZR;
651
+ }
652
+
643
653
MachineBasicBlock::iterator
644
654
AArch64LoadStoreOpt::mergeNarrowInsns (MachineBasicBlock::iterator I,
645
655
MachineBasicBlock::iterator MergeMI,
@@ -775,12 +785,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
775
785
MergeMI->eraseFromParent ();
776
786
return NextI;
777
787
}
778
- assert (isNarrowStore (Opc ) && " Expected narrow store" );
788
+ assert (isPromotableZeroStoreInst (I ) && " Expected promotable zero store" );
779
789
780
790
// Construct the new instruction.
781
791
MachineInstrBuilder MIB;
782
792
MIB = BuildMI (*MBB, InsertionPoint, DL, TII->get (getMatchingWideOpcode (Opc)))
783
- .addOperand ( getLdStRegOp (I) )
793
+ .addReg ( isNarrowStore (Opc) ? AArch64::WZR : AArch64::XZR )
784
794
.addOperand (BaseRegOp)
785
795
.addImm (OffsetImm)
786
796
.setMemRefs (I->mergeMemRefsWith (*MergeMI));
@@ -1211,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1211
1221
unsigned BaseReg = getLdStBaseOp (FirstMI).getReg ();
1212
1222
int Offset = getLdStOffsetOp (FirstMI).getImm ();
1213
1223
int OffsetStride = IsUnscaled ? getMemScale (FirstMI) : 1 ;
1214
- bool IsNarrowStore = isNarrowStore (Opc );
1224
+ bool IsPromotableZeroStore = isPromotableZeroStoreInst (FirstMI );
1215
1225
1216
1226
// Track which registers have been modified and used between the first insn
1217
1227
// (inclusive) and the second insn.
@@ -1282,7 +1292,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1282
1292
continue ;
1283
1293
}
1284
1294
1285
- if (IsNarrowLoad || IsNarrowStore ) {
1295
+ if (IsNarrowLoad || IsPromotableZeroStore ) {
1286
1296
// If the alignment requirements of the scaled wide load/store
1287
1297
// instruction can't express the offset of the scaled narrow
1288
1298
// input, bail and keep looking.
@@ -1307,7 +1317,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1307
1317
// For narrow stores, allow only when the stored value is the same
1308
1318
// (i.e., WZR).
1309
1319
if ((MayLoad && Reg == getLdStRegOp (MI).getReg ()) ||
1310
- (IsNarrowStore && Reg != getLdStRegOp (MI).getReg ())) {
1320
+ (IsPromotableZeroStore && Reg != getLdStRegOp (MI).getReg ())) {
1311
1321
trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1312
1322
MemInsns.push_back (MI);
1313
1323
continue ;
@@ -1633,24 +1643,27 @@ bool AArch64LoadStoreOpt::isCandidateToMergeOrPair(MachineInstr *MI) {
1633
1643
// store.
1634
1644
bool AArch64LoadStoreOpt::tryToMergeLdStInst (
1635
1645
MachineBasicBlock::iterator &MBBI) {
1636
- assert ((isNarrowLoad (MBBI) || isNarrowStore (MBBI)) && " Expected narrow op." );
1646
+ assert ((isNarrowLoad (MBBI) || isPromotableZeroStoreOpcode (MBBI)) &&
1647
+ " Expected narrow op." );
1637
1648
MachineInstr *MI = MBBI;
1638
1649
MachineBasicBlock::iterator E = MI->getParent ()->end ();
1639
1650
1640
1651
if (!isCandidateToMergeOrPair (MI))
1641
1652
return false ;
1642
1653
1643
- // For narrow stores, find only the case where the stored value is WZR.
1644
- if (isNarrowStore (MI) && getLdStRegOp (MI).getReg () != AArch64::WZR)
1654
+ // For promotable zero stores, the stored value should be WZR.
1655
+ if (isPromotableZeroStoreOpcode (MI) &&
1656
+ getLdStRegOp (MI).getReg () != AArch64::WZR)
1645
1657
return false ;
1646
1658
1647
1659
// Look ahead up to LdStLimit instructions for a mergable instruction.
1648
1660
LdStPairFlags Flags;
1649
- MachineBasicBlock::iterator MergeMI = findMatchingInsn (MBBI, Flags, LdStLimit);
1661
+ MachineBasicBlock::iterator MergeMI =
1662
+ findMatchingInsn (MBBI, Flags, LdStLimit);
1650
1663
if (MergeMI != E) {
1651
1664
if (isNarrowLoad (MI)) {
1652
1665
++NumNarrowLoadsPromoted;
1653
- } else if (isNarrowStore (MI)) {
1666
+ } else if (isPromotableZeroStoreInst (MI)) {
1654
1667
++NumZeroStoresPromoted;
1655
1668
}
1656
1669
// Keeping the iterator straight is a pain, so we let the merge routine tell
@@ -1765,13 +1778,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1765
1778
case AArch64::LDRSHWui:
1766
1779
case AArch64::STRBBui:
1767
1780
case AArch64::STRHHui:
1781
+ case AArch64::STRWui:
1768
1782
// Unscaled instructions.
1769
1783
case AArch64::LDURBBi:
1770
1784
case AArch64::LDURHHi:
1771
1785
case AArch64::LDURSBWi:
1772
1786
case AArch64::LDURSHWi:
1773
1787
case AArch64::STURBBi:
1774
- case AArch64::STURHHi: {
1788
+ case AArch64::STURHHi:
1789
+ case AArch64::STURWi: {
1775
1790
if (tryToMergeLdStInst (MBBI)) {
1776
1791
Modified = true ;
1777
1792
break ;
0 commit comments