Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 575b88e

Browse files
author
Jun Bum Lim
committed
[AArch64]Extend merging narrow loads into a wider load
This change extends r251438 to handle more narrow load promotions including byte type, unscaled, and signed. For example, this change will convert : ldursh w1, [x0, #-2] ldurh w2, [x0, #-4] into ldur w2, [x0, #-4] asr w1, w2, #16 and w2, w2, #0xffff git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253577 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 02857f7 commit 575b88e

File tree

2 files changed

+363
-26
lines changed

2 files changed

+363
-26
lines changed

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 107 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ static bool isUnscaledLdSt(unsigned Opc) {
161161
case AArch64::LDURXi:
162162
case AArch64::LDURSWi:
163163
case AArch64::LDURHHi:
164+
case AArch64::LDURBBi:
165+
case AArch64::LDURSBWi:
166+
case AArch64::LDURSHWi:
164167
return true;
165168
}
166169
}
@@ -169,16 +172,39 @@ static bool isUnscaledLdSt(MachineInstr *MI) {
169172
return isUnscaledLdSt(MI->getOpcode());
170173
}
171174

175+
static unsigned getBitExtrOpcode(MachineInstr *MI) {
176+
switch (MI->getOpcode()) {
177+
default:
178+
llvm_unreachable("Unexpected opcode.");
179+
case AArch64::LDRBBui:
180+
case AArch64::LDURBBi:
181+
case AArch64::LDRHHui:
182+
case AArch64::LDURHHi:
183+
return AArch64::UBFMWri;
184+
case AArch64::LDRSBWui:
185+
case AArch64::LDURSBWi:
186+
case AArch64::LDRSHWui:
187+
case AArch64::LDURSHWi:
188+
return AArch64::SBFMWri;
189+
}
190+
}
191+
172192
static bool isSmallTypeLdMerge(unsigned Opc) {
173193
switch (Opc) {
174194
default:
175195
return false;
176196
case AArch64::LDRHHui:
177197
case AArch64::LDURHHi:
198+
case AArch64::LDRBBui:
199+
case AArch64::LDURBBi:
200+
case AArch64::LDRSHWui:
201+
case AArch64::LDURSHWi:
202+
case AArch64::LDRSBWui:
203+
case AArch64::LDURSBWi:
178204
return true;
179-
// FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
180205
}
181206
}
207+
182208
static bool isSmallTypeLdMerge(MachineInstr *MI) {
183209
return isSmallTypeLdMerge(MI->getOpcode());
184210
}
@@ -189,10 +215,15 @@ static int getMemScale(MachineInstr *MI) {
189215
default:
190216
llvm_unreachable("Opcode has unknown scale!");
191217
case AArch64::LDRBBui:
218+
case AArch64::LDURBBi:
219+
case AArch64::LDRSBWui:
220+
case AArch64::LDURSBWi:
192221
case AArch64::STRBBui:
193222
return 1;
194223
case AArch64::LDRHHui:
195224
case AArch64::LDURHHi:
225+
case AArch64::LDRSHWui:
226+
case AArch64::LDURSHWi:
196227
case AArch64::STRHHui:
197228
return 2;
198229
case AArch64::LDRSui:
@@ -265,11 +296,21 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
265296
case AArch64::LDURSi:
266297
case AArch64::LDRHHui:
267298
case AArch64::LDURHHi:
299+
case AArch64::LDRBBui:
300+
case AArch64::LDURBBi:
268301
return Opc;
269302
case AArch64::LDRSWui:
270303
return AArch64::LDRWui;
271304
case AArch64::LDURSWi:
272305
return AArch64::LDURWi;
306+
case AArch64::LDRSBWui:
307+
return AArch64::LDRBBui;
308+
case AArch64::LDRSHWui:
309+
return AArch64::LDRHHui;
310+
case AArch64::LDURSBWi:
311+
return AArch64::LDURBBi;
312+
case AArch64::LDURSHWi:
313+
return AArch64::LDURHHi;
273314
}
274315
}
275316

@@ -311,9 +352,17 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
311352
case AArch64::LDURSWi:
312353
return AArch64::LDPSWi;
313354
case AArch64::LDRHHui:
355+
case AArch64::LDRSHWui:
314356
return AArch64::LDRWui;
315357
case AArch64::LDURHHi:
358+
case AArch64::LDURSHWi:
316359
return AArch64::LDURWi;
360+
case AArch64::LDRBBui:
361+
case AArch64::LDRSBWui:
362+
return AArch64::LDRHHui;
363+
case AArch64::LDURBBi:
364+
case AArch64::LDURSBWi:
365+
return AArch64::LDURHHi;
317366
}
318367
}
319368

@@ -535,16 +584,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
535584

536585
if (isSmallTypeLdMerge(Opc)) {
537586
// Change the scaled offset from small to large type.
538-
if (!IsUnscaled)
587+
if (!IsUnscaled) {
588+
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
539589
OffsetImm /= 2;
590+
}
540591
MachineInstr *RtNewDest = MergeForward ? I : Paired;
541592
// When merging small (< 32 bit) loads for big-endian targets, the order of
542593
// the component parts gets swapped.
543594
if (!Subtarget->isLittleEndian())
544595
std::swap(RtMI, Rt2MI);
545596
// Construct the new load instruction.
546-
// FIXME: currently we support only halfword unsigned load. We need to
547-
// handle byte type, signed, and store instructions as well.
548597
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
549598
NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
550599
TII->get(NewOpc))
@@ -564,35 +613,61 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
564613
DEBUG(dbgs() << " with instructions:\n ");
565614
DEBUG((NewMemMI)->print(dbgs()));
566615

616+
int Width = getMemScale(I) == 1 ? 8 : 16;
617+
int LSBLow = 0;
618+
int LSBHigh = Width;
619+
int ImmsLow = LSBLow + Width - 1;
620+
int ImmsHigh = LSBHigh + Width - 1;
567621
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
568622
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
569-
// Create the bitfield extract for high half.
623+
// Create the bitfield extract for high bits.
570624
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
571-
TII->get(AArch64::UBFMWri))
625+
TII->get(getBitExtrOpcode(Rt2MI)))
572626
.addOperand(getLdStRegOp(Rt2MI))
573627
.addReg(getLdStRegOp(RtNewDest).getReg())
574-
.addImm(16)
575-
.addImm(31);
576-
// Create the bitfield extract for low half.
577-
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
578-
TII->get(AArch64::ANDWri))
579-
.addOperand(getLdStRegOp(RtMI))
580-
.addReg(getLdStRegOp(RtNewDest).getReg())
581-
.addImm(15);
628+
.addImm(LSBHigh)
629+
.addImm(ImmsHigh);
630+
// Create the bitfield extract for low bits.
631+
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
632+
// For unsigned, prefer to use AND for low bits.
633+
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
634+
TII->get(AArch64::ANDWri))
635+
.addOperand(getLdStRegOp(RtMI))
636+
.addReg(getLdStRegOp(RtNewDest).getReg())
637+
.addImm(ImmsLow);
638+
} else {
639+
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
640+
TII->get(getBitExtrOpcode(RtMI)))
641+
.addOperand(getLdStRegOp(RtMI))
642+
.addReg(getLdStRegOp(RtNewDest).getReg())
643+
.addImm(LSBLow)
644+
.addImm(ImmsLow);
645+
}
582646
} else {
583-
// Create the bitfield extract for low half.
584-
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
585-
TII->get(AArch64::ANDWri))
586-
.addOperand(getLdStRegOp(RtMI))
587-
.addReg(getLdStRegOp(RtNewDest).getReg())
588-
.addImm(15);
589-
// Create the bitfield extract for high half.
647+
// Create the bitfield extract for low bits.
648+
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
649+
// For unsigned, prefer to use AND for low bits.
650+
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
651+
TII->get(AArch64::ANDWri))
652+
.addOperand(getLdStRegOp(RtMI))
653+
.addReg(getLdStRegOp(RtNewDest).getReg())
654+
.addImm(ImmsLow);
655+
} else {
656+
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
657+
TII->get(getBitExtrOpcode(RtMI)))
658+
.addOperand(getLdStRegOp(RtMI))
659+
.addReg(getLdStRegOp(RtNewDest).getReg())
660+
.addImm(LSBLow)
661+
.addImm(ImmsLow);
662+
}
663+
664+
// Create the bitfield extract for high bits.
590665
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
591-
TII->get(AArch64::UBFMWri))
666+
TII->get(getBitExtrOpcode(Rt2MI)))
592667
.addOperand(getLdStRegOp(Rt2MI))
593668
.addReg(getLdStRegOp(RtNewDest).getReg())
594-
.addImm(16)
595-
.addImm(31);
669+
.addImm(LSBHigh)
670+
.addImm(ImmsHigh);
596671
}
597672
DEBUG(dbgs() << " ");
598673
DEBUG((BitExtMI1)->print(dbgs()));
@@ -1173,7 +1248,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
11731248
bool enableNarrowLdOpt) {
11741249
bool Modified = false;
11751250
// Three tranformations to do here:
1176-
// 1) Find halfword loads that can be merged into a single 32-bit word load
1251+
// 1) Find narrow loads that can be converted into a single wider load
11771252
// with bitfield extract instructions.
11781253
// e.g.,
11791254
// ldrh w0, [x2]
@@ -1206,9 +1281,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
12061281
++MBBI;
12071282
break;
12081283
// Scaled instructions.
1284+
case AArch64::LDRBBui:
12091285
case AArch64::LDRHHui:
1286+
case AArch64::LDRSBWui:
1287+
case AArch64::LDRSHWui:
12101288
// Unscaled instructions.
1211-
case AArch64::LDURHHi: {
1289+
case AArch64::LDURBBi:
1290+
case AArch64::LDURHHi:
1291+
case AArch64::LDURSBWi:
1292+
case AArch64::LDURSHWi: {
12121293
if (tryToMergeLdStInst(MBBI)) {
12131294
Modified = true;
12141295
break;

0 commit comments

Comments
 (0)