Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 87f50aa

Browse files
author
James Molloy
committed
[ARM] Shrink post-indexed LDR and STR to LDM/STM
A Thumb-2 post-indexed LDR instruction such as: ldr.w r0, [r1], #4 Can be rewritten as: ldm.n r1!, {r0} LDMs can be more expensive than LDRs on some cores, so this has been enabled only in minsize mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272002 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d5127f4 commit 87f50aa

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

lib/Target/ARM/Thumb2SizeReduction.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,14 @@ namespace {
116116
{ ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
117117
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
118118
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
119+
{ ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
119120
{ ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
120121
{ ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
121122
{ ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
122123
{ ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
123124
{ ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
124125
{ ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
126+
{ ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
125127

126128
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
127129
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
@@ -423,6 +425,46 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
423425
HasShift = true;
424426
OpNum = 4;
425427
break;
428+
case ARM::t2LDR_POST:
429+
case ARM::t2STR_POST: {
430+
if (!MBB.getParent()->getFunction()->optForMinSize())
431+
return false;
432+
433+
// We're creating a completely different type of load/store - LDM from LDR.
434+
// For this reason we can't reuse the logic at the end of this function; we
435+
// have to implement the MI building here.
436+
bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
437+
unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
438+
unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
439+
unsigned Offset = MI->getOperand(3).getImm();
440+
unsigned PredImm = MI->getOperand(4).getImm();
441+
unsigned PredReg = MI->getOperand(5).getReg();
442+
assert(isARMLowRegister(Rt));
443+
assert(isARMLowRegister(Rn));
444+
445+
if (Offset != 4)
446+
return false;
447+
448+
// Add the 16-bit load / store instruction.
449+
DebugLoc dl = MI->getDebugLoc();
450+
auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
451+
.addReg(Rn, RegState::Define)
452+
.addReg(Rn)
453+
.addImm(PredImm)
454+
.addReg(PredReg)
455+
.addReg(Rt, IsStore ? 0 : RegState::Define);
456+
457+
// Transfer memoperands.
458+
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
459+
460+
// Transfer MI flags.
461+
MIB.setMIFlags(MI->getFlags());
462+
463+
// Kill the old instruction.
464+
MI->eraseFromParent();
465+
++NumLdSts;
466+
return true;
467+
}
426468
case ARM::t2LDMIA: {
427469
unsigned BaseReg = MI->getOperand(0).getReg();
428470
assert(isARMLowRegister(BaseReg));

test/CodeGen/ARM/t2-shrink-ldrpost.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: llc < %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
4+
target triple = "thumbv7m--linux-gnu"
5+
6+
; CHECK-LABEL: f:
7+
; CHECK: ldm r{{[0-9]}}!, {r[[x:[0-9]]]}
8+
; CHECK: add.w r[[x]], r[[x]], #3
9+
; CHECK: stm r{{[0-9]}}!, {r[[x]]}
10+
define void @f(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize minsize {
11+
%1 = icmp sgt i32 %n, 0
12+
br i1 %1, label %.lr.ph, label %._crit_edge
13+
14+
.lr.ph: ; preds = %.lr.ph, %0
15+
%i.04 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ]
16+
%.03 = phi i32* [ %2, %.lr.ph ], [ %b, %0 ]
17+
%.012 = phi i32* [ %5, %.lr.ph ], [ %a, %0 ]
18+
%2 = getelementptr inbounds i32, i32* %.03, i32 1
19+
%3 = load i32, i32* %.03, align 4
20+
%4 = add nsw i32 %3, 3
21+
%5 = getelementptr inbounds i32, i32* %.012, i32 1
22+
store i32 %4, i32* %.012, align 4
23+
%6 = add nsw i32 %i.04, 1
24+
%exitcond = icmp eq i32 %6, %n
25+
br i1 %exitcond, label %._crit_edge, label %.lr.ph
26+
27+
._crit_edge: ; preds = %.lr.ph, %0
28+
ret void
29+
}
30+
31+
; CHECK-LABEL: f_nominsize:
32+
; CHECK-NOT: ldm
33+
define void @f_nominsize(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize {
34+
%1 = icmp sgt i32 %n, 0
35+
br i1 %1, label %.lr.ph, label %._crit_edge
36+
37+
.lr.ph: ; preds = %.lr.ph, %0
38+
%i.04 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ]
39+
%.03 = phi i32* [ %2, %.lr.ph ], [ %b, %0 ]
40+
%.012 = phi i32* [ %5, %.lr.ph ], [ %a, %0 ]
41+
%2 = getelementptr inbounds i32, i32* %.03, i32 1
42+
%3 = load i32, i32* %.03, align 4
43+
%4 = add nsw i32 %3, 3
44+
%5 = getelementptr inbounds i32, i32* %.012, i32 1
45+
store i32 %4, i32* %.012, align 4
46+
%6 = add nsw i32 %i.04, 1
47+
%exitcond = icmp eq i32 %6, %n
48+
br i1 %exitcond, label %._crit_edge, label %.lr.ph
49+
50+
._crit_edge: ; preds = %.lr.ph, %0
51+
ret void
52+
}

0 commit comments

Comments
 (0)