Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 844cafe

Browse files
author
Jun Bum Lim
committed
[AArch64] Merge two adjacent str WZR into str XZR
Summary: This change merges adjacent 32 bit zero stores into a 64 bit zero store. e.g., str wzr, [x0] str wzr, [x0, #4] becomes str xzr, [x0] Therefore, four adjacent 32 bit zero stores will be a single stp. e.g., str wzr, [x0] str wzr, [x0, #4] str wzr, [x0, #8] str wzr, [x0, #12] becomes stp xzr, xzr, [x0] Reviewers: mcrosier, jmolloy, gberry, t.p.northover Subscribers: aemerson, rengolin, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D16933 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260682 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9f34dc1 commit 844cafe

File tree

2 files changed

+105
-15
lines changed

2 files changed

+105
-15
lines changed

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,6 @@ static bool isNarrowStore(unsigned Opc) {
235235
}
236236
}
237237

238-
static bool isNarrowStore(MachineInstr *MI) {
239-
return isNarrowStore(MI->getOpcode());
240-
}
241-
242238
static bool isNarrowLoad(unsigned Opc) {
243239
switch (Opc) {
244240
default:
@@ -386,6 +382,10 @@ static unsigned getMatchingWideOpcode(unsigned Opc) {
386382
return AArch64::STURHHi;
387383
case AArch64::STURHHi:
388384
return AArch64::STURWi;
385+
case AArch64::STURWi:
386+
return AArch64::STURXi;
387+
case AArch64::STRWui:
388+
return AArch64::STRXui;
389389
case AArch64::LDRHHui:
390390
case AArch64::LDRSHWui:
391391
return AArch64::LDRWui;
@@ -640,6 +640,16 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
640640
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
641641
}
642642

643+
static bool isPromotableZeroStoreOpcode(MachineInstr *MI) {
644+
unsigned Opc = MI->getOpcode();
645+
return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
646+
}
647+
648+
static bool isPromotableZeroStoreInst(MachineInstr *MI) {
649+
return (isPromotableZeroStoreOpcode(MI)) &&
650+
getLdStRegOp(MI).getReg() == AArch64::WZR;
651+
}
652+
643653
MachineBasicBlock::iterator
644654
AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
645655
MachineBasicBlock::iterator MergeMI,
@@ -775,12 +785,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
775785
MergeMI->eraseFromParent();
776786
return NextI;
777787
}
778-
assert(isNarrowStore(Opc) && "Expected narrow store");
788+
assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
779789

780790
// Construct the new instruction.
781791
MachineInstrBuilder MIB;
782792
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
783-
.addOperand(getLdStRegOp(I))
793+
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
784794
.addOperand(BaseRegOp)
785795
.addImm(OffsetImm)
786796
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
@@ -1211,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12111221
unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
12121222
int Offset = getLdStOffsetOp(FirstMI).getImm();
12131223
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
1214-
bool IsNarrowStore = isNarrowStore(Opc);
1224+
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
12151225

12161226
// Track which registers have been modified and used between the first insn
12171227
// (inclusive) and the second insn.
@@ -1282,7 +1292,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12821292
continue;
12831293
}
12841294

1285-
if (IsNarrowLoad || IsNarrowStore) {
1295+
if (IsNarrowLoad || IsPromotableZeroStore) {
12861296
// If the alignment requirements of the scaled wide load/store
12871297
// instruction can't express the offset of the scaled narrow
12881298
// input, bail and keep looking.
@@ -1307,7 +1317,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
13071317
// For narrow stores, allow only when the stored value is the same
13081318
// (i.e., WZR).
13091319
if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
1310-
(IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
1320+
(IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
13111321
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
13121322
MemInsns.push_back(MI);
13131323
continue;
@@ -1633,24 +1643,27 @@ bool AArch64LoadStoreOpt::isCandidateToMergeOrPair(MachineInstr *MI) {
16331643
// store.
16341644
bool AArch64LoadStoreOpt::tryToMergeLdStInst(
16351645
MachineBasicBlock::iterator &MBBI) {
1636-
assert((isNarrowLoad(MBBI) || isNarrowStore(MBBI)) && "Expected narrow op.");
1646+
assert((isNarrowLoad(MBBI) || isPromotableZeroStoreOpcode(MBBI)) &&
1647+
"Expected narrow op.");
16371648
MachineInstr *MI = MBBI;
16381649
MachineBasicBlock::iterator E = MI->getParent()->end();
16391650

16401651
if (!isCandidateToMergeOrPair(MI))
16411652
return false;
16421653

1643-
// For narrow stores, find only the case where the stored value is WZR.
1644-
if (isNarrowStore(MI) && getLdStRegOp(MI).getReg() != AArch64::WZR)
1654+
// For promotable zero stores, the stored value should be WZR.
1655+
if (isPromotableZeroStoreOpcode(MI) &&
1656+
getLdStRegOp(MI).getReg() != AArch64::WZR)
16451657
return false;
16461658

16471659
// Look ahead up to LdStLimit instructions for a mergable instruction.
16481660
LdStPairFlags Flags;
1649-
MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit);
1661+
MachineBasicBlock::iterator MergeMI =
1662+
findMatchingInsn(MBBI, Flags, LdStLimit);
16501663
if (MergeMI != E) {
16511664
if (isNarrowLoad(MI)) {
16521665
++NumNarrowLoadsPromoted;
1653-
} else if (isNarrowStore(MI)) {
1666+
} else if (isPromotableZeroStoreInst(MI)) {
16541667
++NumZeroStoresPromoted;
16551668
}
16561669
// Keeping the iterator straight is a pain, so we let the merge routine tell
@@ -1765,13 +1778,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
17651778
case AArch64::LDRSHWui:
17661779
case AArch64::STRBBui:
17671780
case AArch64::STRHHui:
1781+
case AArch64::STRWui:
17681782
// Unscaled instructions.
17691783
case AArch64::LDURBBi:
17701784
case AArch64::LDURHHi:
17711785
case AArch64::LDURSBWi:
17721786
case AArch64::LDURSHWi:
17731787
case AArch64::STURBBi:
1774-
case AArch64::STURHHi: {
1788+
case AArch64::STURHHi:
1789+
case AArch64::STURWi: {
17751790
if (tryToMergeLdStInst(MBBI)) {
17761791
Modified = true;
17771792
break;

test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,42 @@ entry:
352352
ret void
353353
}
354354

355+
;CHECK-LABEL: Strw_zero
356+
;CHECK : str xzr
357+
define void @Strw_zero(i32* nocapture %P, i32 %n) {
358+
entry:
359+
%idxprom = sext i32 %n to i64
360+
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
361+
store i32 0, i32* %arrayidx
362+
%add = add nsw i32 %n, 1
363+
%idxprom1 = sext i32 %add to i64
364+
%arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
365+
store i32 0, i32* %arrayidx2
366+
ret void
367+
}
368+
369+
;CHECK-LABEL: Strw_zero_4
370+
;CHECK : stp xzr
371+
define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
372+
entry:
373+
%idxprom = sext i32 %n to i64
374+
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
375+
store i32 0, i32* %arrayidx
376+
%add = add nsw i32 %n, 1
377+
%idxprom1 = sext i32 %add to i64
378+
%arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
379+
store i32 0, i32* %arrayidx2
380+
%add3 = add nsw i32 %n, 2
381+
%idxprom4 = sext i32 %add3 to i64
382+
%arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
383+
store i32 0, i32* %arrayidx5
384+
%add6 = add nsw i32 %n, 3
385+
%idxprom7 = sext i32 %add6 to i64
386+
%arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
387+
store i32 0, i32* %arrayidx8
388+
ret void
389+
}
390+
355391
; CHECK-LABEL: Sturb_zero
356392
; CHECK: sturh wzr
357393
define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
@@ -404,3 +440,42 @@ entry:
404440
store i16 0, i16* %arrayidx9
405441
ret void
406442
}
443+
444+
;CHECK-LABEL: Sturw_zero
445+
;CHECK : stur xzr
446+
define void @Sturw_zero(i32* nocapture %P, i32 %n) {
447+
entry:
448+
%sub = add nsw i32 %n, -3
449+
%idxprom = sext i32 %sub to i64
450+
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
451+
store i32 0, i32* %arrayidx
452+
%sub1 = add nsw i32 %n, -4
453+
%idxprom2 = sext i32 %sub1 to i64
454+
%arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
455+
store i32 0, i32* %arrayidx3
456+
ret void
457+
}
458+
459+
;CHECK-LABEL: Sturw_zero_4
460+
;CHECK : str xzr
461+
define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
462+
entry:
463+
%sub = add nsw i32 %n, -3
464+
%idxprom = sext i32 %sub to i64
465+
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
466+
store i32 0, i32* %arrayidx
467+
%sub1 = add nsw i32 %n, -4
468+
%idxprom2 = sext i32 %sub1 to i64
469+
%arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
470+
store i32 0, i32* %arrayidx3
471+
%sub4 = add nsw i32 %n, -2
472+
%idxprom5 = sext i32 %sub4 to i64
473+
%arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
474+
store i32 0, i32* %arrayidx6
475+
%sub7 = add nsw i32 %n, -1
476+
%idxprom8 = sext i32 %sub7 to i64
477+
%arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
478+
store i32 0, i32* %arrayidx9
479+
ret void
480+
}
481+

0 commit comments

Comments
 (0)