@@ -99,7 +99,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
99
99
// Return the matching instruction if one is found, else MBB->end().
100
100
MachineBasicBlock::iterator findMatchingInsn (MachineBasicBlock::iterator I,
101
101
LdStPairFlags &Flags,
102
- unsigned Limit);
102
+ unsigned Limit,
103
+ bool FindNarrowMerge);
103
104
104
105
// Scan the instructions looking for a store that writes to the address from
105
106
// which the current load instruction reads. Return true if one is found.
@@ -757,7 +758,8 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
757
758
MergeMI->eraseFromParent ();
758
759
return NextI;
759
760
}
760
- assert (isPromotableZeroStoreInst (I) && " Expected promotable zero store" );
761
+ assert (isPromotableZeroStoreInst (I) && isPromotableZeroStoreInst (MergeMI) &&
762
+ " Expected promotable zero store" );
761
763
762
764
// Construct the new instruction.
763
765
MachineInstrBuilder MIB;
@@ -1181,7 +1183,8 @@ static bool canMergeOpc(unsigned OpcA, unsigned OpcB, LdStPairFlags &Flags,
1181
1183
// / current instruction into a wider equivalent or a load/store pair.
1182
1184
MachineBasicBlock::iterator
1183
1185
AArch64LoadStoreOpt::findMatchingInsn (MachineBasicBlock::iterator I,
1184
- LdStPairFlags &Flags, unsigned Limit) {
1186
+ LdStPairFlags &Flags, unsigned Limit,
1187
+ bool FindNarrowMerge) {
1185
1188
MachineBasicBlock::iterator E = I->getParent ()->end ();
1186
1189
MachineBasicBlock::iterator MBBI = I;
1187
1190
MachineInstr *FirstMI = I;
@@ -1255,26 +1258,26 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1255
1258
// safely transform. Similarly, stop if we see a hint to avoid pairs.
1256
1259
if (MI->hasOrderedMemoryRef () || TII->isLdStPairSuppressed (MI))
1257
1260
return E;
1258
- // If the resultant immediate offset of merging these instructions
1259
- // is out of range for a pairwise instruction, bail and keep looking.
1260
- bool IsNarrowLoad = isNarrowLoad (MI->getOpcode ());
1261
- if (!IsNarrowLoad &&
1262
- !inBoundsForPair (IsUnscaled, MinOffset, OffsetStride)) {
1263
- trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1264
- MemInsns.push_back (MI);
1265
- continue ;
1266
- }
1267
1261
1268
- if (IsNarrowLoad || IsPromotableZeroStore ) {
1262
+ if (FindNarrowMerge ) {
1269
1263
// If the alignment requirements of the scaled wide load/store
1270
- // instruction can't express the offset of the scaled narrow
1271
- // input, bail and keep looking.
1272
- if (!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) {
1264
+ // instruction can't express the offset of the scaled narrow input,
1265
+ // bail and keep looking. For promotable zero stores, allow only when
1266
+ // the stored value is the same (i.e., WZR).
1267
+ if ((!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) ||
1268
+ (IsPromotableZeroStore && Reg != getLdStRegOp (MI).getReg ())) {
1273
1269
trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1274
1270
MemInsns.push_back (MI);
1275
1271
continue ;
1276
1272
}
1277
1273
} else {
1274
+ // If the resultant immediate offset of merging these instructions
1275
+ // is out of range for a pairwise instruction, bail and keep looking.
1276
+ if (!inBoundsForPair (IsUnscaled, MinOffset, OffsetStride)) {
1277
+ trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1278
+ MemInsns.push_back (MI);
1279
+ continue ;
1280
+ }
1278
1281
// If the alignment requirements of the paired (scaled) instruction
1279
1282
// can't express the offset of the unscaled input, bail and keep
1280
1283
// looking.
@@ -1287,10 +1290,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1287
1290
// If the destination register of the loads is the same register, bail
1288
1291
// and keep looking. A load-pair instruction with both destination
1289
1292
// registers the same is UNPREDICTABLE and will result in an exception.
1290
- // For narrow stores, allow only when the stored value is the same
1291
- // (i.e., WZR).
1292
- if ((MayLoad && Reg == getLdStRegOp (MI).getReg ()) ||
1293
- (IsPromotableZeroStore && Reg != getLdStRegOp (MI).getReg ())) {
1293
+ if (MayLoad && Reg == getLdStRegOp (MI).getReg ()) {
1294
1294
trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1295
1295
MemInsns.push_back (MI);
1296
1296
continue ;
@@ -1609,7 +1609,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
1609
1609
// Look ahead up to LdStLimit instructions for a mergable instruction.
1610
1610
LdStPairFlags Flags;
1611
1611
MachineBasicBlock::iterator MergeMI =
1612
- findMatchingInsn (MBBI, Flags, LdStLimit);
1612
+ findMatchingInsn (MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true );
1613
1613
if (MergeMI != E) {
1614
1614
if (isNarrowLoad (MI)) {
1615
1615
++NumNarrowLoadsPromoted;
@@ -1644,7 +1644,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
1644
1644
1645
1645
// Look ahead up to LdStLimit instructions for a pairable instruction.
1646
1646
LdStPairFlags Flags;
1647
- MachineBasicBlock::iterator Paired = findMatchingInsn (MBBI, Flags, LdStLimit);
1647
+ MachineBasicBlock::iterator Paired =
1648
+ findMatchingInsn (MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false );
1648
1649
if (Paired != E) {
1649
1650
++NumPairCreated;
1650
1651
if (TII->isUnscaledLdSt (MI))
0 commit comments