Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 7a8b700

Browse files
author
Jun Bum Lim
committed
[AArch64] Handle missing store pair opportunity
Summary: This change will handle missing store pair opportunity where the first store instruction stores zero followed by the non-zero store. For example, this change will convert : str wzr, [x8] str w1, [x8, #4] into: stp wzr, w1, [x8] Reviewers: jmolloy, t.p.northover, mcrosier Subscribers: flyingforyou, aemerson, rengolin, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D18570 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265021 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ca50bf5 commit 7a8b700

File tree

2 files changed

+45
-30
lines changed

2 files changed

+45
-30
lines changed

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
9999
// Return the matching instruction if one is found, else MBB->end().
100100
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
101101
LdStPairFlags &Flags,
102-
unsigned Limit);
102+
unsigned Limit,
103+
bool FindNarrowMerge);
103104

104105
// Scan the instructions looking for a store that writes to the address from
105106
// which the current load instruction reads. Return true if one is found.
@@ -757,7 +758,8 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
757758
MergeMI->eraseFromParent();
758759
return NextI;
759760
}
760-
assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
761+
assert(isPromotableZeroStoreInst(I) && isPromotableZeroStoreInst(MergeMI) &&
762+
"Expected promotable zero store");
761763

762764
// Construct the new instruction.
763765
MachineInstrBuilder MIB;
@@ -1181,7 +1183,8 @@ static bool canMergeOpc(unsigned OpcA, unsigned OpcB, LdStPairFlags &Flags,
11811183
/// current instruction into a wider equivalent or a load/store pair.
11821184
MachineBasicBlock::iterator
11831185
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1184-
LdStPairFlags &Flags, unsigned Limit) {
1186+
LdStPairFlags &Flags, unsigned Limit,
1187+
bool FindNarrowMerge) {
11851188
MachineBasicBlock::iterator E = I->getParent()->end();
11861189
MachineBasicBlock::iterator MBBI = I;
11871190
MachineInstr *FirstMI = I;
@@ -1255,26 +1258,26 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12551258
// safely transform. Similarly, stop if we see a hint to avoid pairs.
12561259
if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
12571260
return E;
1258-
// If the resultant immediate offset of merging these instructions
1259-
// is out of range for a pairwise instruction, bail and keep looking.
1260-
bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
1261-
if (!IsNarrowLoad &&
1262-
!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1263-
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1264-
MemInsns.push_back(MI);
1265-
continue;
1266-
}
12671261

1268-
if (IsNarrowLoad || IsPromotableZeroStore) {
1262+
if (FindNarrowMerge) {
12691263
// If the alignment requirements of the scaled wide load/store
1270-
// instruction can't express the offset of the scaled narrow
1271-
// input, bail and keep looking.
1272-
if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
1264+
// instruction can't express the offset of the scaled narrow input,
1265+
// bail and keep looking. For promotable zero stores, allow only when
1266+
// the stored value is the same (i.e., WZR).
1267+
if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1268+
(IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
12731269
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
12741270
MemInsns.push_back(MI);
12751271
continue;
12761272
}
12771273
} else {
1274+
// If the resultant immediate offset of merging these instructions
1275+
// is out of range for a pairwise instruction, bail and keep looking.
1276+
if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1277+
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1278+
MemInsns.push_back(MI);
1279+
continue;
1280+
}
12781281
// If the alignment requirements of the paired (scaled) instruction
12791282
// can't express the offset of the unscaled input, bail and keep
12801283
// looking.
@@ -1287,10 +1290,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12871290
// If the destination register of the loads is the same register, bail
12881291
// and keep looking. A load-pair instruction with both destination
12891292
// registers the same is UNPREDICTABLE and will result in an exception.
1290-
// For narrow stores, allow only when the stored value is the same
1291-
// (i.e., WZR).
1292-
if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
1293-
(IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1293+
if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
12941294
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
12951295
MemInsns.push_back(MI);
12961296
continue;
@@ -1609,7 +1609,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
16091609
// Look ahead up to LdStLimit instructions for a mergable instruction.
16101610
LdStPairFlags Flags;
16111611
MachineBasicBlock::iterator MergeMI =
1612-
findMatchingInsn(MBBI, Flags, LdStLimit);
1612+
findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
16131613
if (MergeMI != E) {
16141614
if (isNarrowLoad(MI)) {
16151615
++NumNarrowLoadsPromoted;
@@ -1644,7 +1644,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
16441644

16451645
// Look ahead up to LdStLimit instructions for a pairable instruction.
16461646
LdStPairFlags Flags;
1647-
MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, LdStLimit);
1647+
MachineBasicBlock::iterator Paired =
1648+
findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
16481649
if (Paired != E) {
16491650
++NumPairCreated;
16501651
if (TII->isUnscaledLdSt(MI))

test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -353,8 +353,8 @@ entry:
353353
ret void
354354
}
355355

356-
;CHECK-LABEL: Strw_zero
357-
;CHECK : str xzr
356+
; CHECK-LABEL: Strw_zero
357+
; CHECK: str xzr
358358
define void @Strw_zero(i32* nocapture %P, i32 %n) {
359359
entry:
360360
%idxprom = sext i32 %n to i64
@@ -367,8 +367,22 @@ entry:
367367
ret void
368368
}
369369

370-
;CHECK-LABEL: Strw_zero_4
371-
;CHECK : stp xzr
370+
; CHECK-LABEL: Strw_zero_nonzero
371+
; CHECK: stp wzr, w1
372+
define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n) {
373+
entry:
374+
%idxprom = sext i32 %n to i64
375+
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
376+
store i32 0, i32* %arrayidx
377+
%add = add nsw i32 %n, 1
378+
%idxprom1 = sext i32 %add to i64
379+
%arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
380+
store i32 %n, i32* %arrayidx2
381+
ret void
382+
}
383+
384+
; CHECK-LABEL: Strw_zero_4
385+
; CHECK: stp xzr
372386
define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
373387
entry:
374388
%idxprom = sext i32 %n to i64
@@ -442,8 +456,8 @@ entry:
442456
ret void
443457
}
444458

445-
;CHECK-LABEL: Sturw_zero
446-
;CHECK : stur xzr
459+
; CHECK-LABEL: Sturw_zero
460+
; CHECK: stur xzr
447461
define void @Sturw_zero(i32* nocapture %P, i32 %n) {
448462
entry:
449463
%sub = add nsw i32 %n, -3
@@ -457,8 +471,8 @@ entry:
457471
ret void
458472
}
459473

460-
;CHECK-LABEL: Sturw_zero_4
461-
;CHECK : str xzr
474+
; CHECK-LABEL: Sturw_zero_4
475+
; CHECK: stp xzr, xzr
462476
define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
463477
entry:
464478
%sub = add nsw i32 %n, -3

0 commit comments

Comments
 (0)