Skip to content

Commit 318d6ba

Browse files
committed
Peephole optimization for ABS on ARM.
Patch by Ana Pazos! llvm-svn: 141365
1 parent e19661e commit 318d6ba

File tree

6 files changed

+174
-7
lines changed

6 files changed

+174
-7
lines changed

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
4747
cl::desc("Check fp vmla / vmls hazard at isel time"),
4848
cl::init(true));
4949

50+
static cl::opt<bool>
51+
DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
52+
cl::desc("Enable / disable ARM integer abs transform"),
53+
cl::init(false));
54+
5055
//===--------------------------------------------------------------------===//
5156
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
5257
/// instructions for SelectionDAG operations.
@@ -252,6 +257,9 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
252257
ARMCC::CondCodes CCVal, SDValue CCR,
253258
SDValue InFlag);
254259

260+
// Select special operations if node forms integer ABS pattern
261+
SDNode *SelectABSOp(SDNode *N);
262+
255263
SDNode *SelectConcatVector(SDNode *N);
256264

257265
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@@ -2295,6 +2303,53 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
22952303
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
22962304
}
22972305

2306+
/// Target-specific DAG combining for ISD::XOR.
2307+
/// Target-independent combining lowers SELECT_CC nodes of the form
2308+
/// select_cc setg[ge] X, 0, X, -X
2309+
/// select_cc setgt X, -1, X, -X
2310+
/// select_cc setl[te] X, 0, -X, X
2311+
/// select_cc setlt X, 1, -X, X
2312+
/// which represent Integer ABS into:
2313+
/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2314+
/// ARM instruction selection detects the latter and matches it to
2315+
/// ARM::ABS or ARM::t2ABS machine node.
2316+
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2317+
SDValue XORSrc0 = N->getOperand(0);
2318+
SDValue XORSrc1 = N->getOperand(1);
2319+
DebugLoc DL = N->getDebugLoc();
2320+
EVT VT = N->getValueType(0);
2321+
2322+
if (DisableARMIntABS)
2323+
return NULL;
2324+
2325+
if (XORSrc0.getOpcode() != ISD::ADD ||
2326+
XORSrc1.getOpcode() != ISD::SRA)
2327+
return NULL;
2328+
2329+
SDValue ADDSrc0 = XORSrc0.getOperand(0);
2330+
SDValue ADDSrc1 = XORSrc0.getOperand(1);
2331+
SDValue SRASrc0 = XORSrc1.getOperand(0);
2332+
SDValue SRASrc1 = XORSrc1.getOperand(1);
2333+
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2334+
EVT XType = SRASrc0.getValueType();
2335+
unsigned Size = XType.getSizeInBits() - 1;
2336+
2337+
if (ADDSrc1 == XORSrc1 &&
2338+
ADDSrc0 == SRASrc0 &&
2339+
XType.isInteger() &&
2340+
SRAConstant != NULL &&
2341+
Size == SRAConstant->getZExtValue()) {
2342+
2343+
unsigned Opcode = ARM::ABS;
2344+
if (Subtarget->isThumb2())
2345+
Opcode = ARM::t2ABS;
2346+
2347+
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2348+
}
2349+
2350+
return NULL;
2351+
}
2352+
22982353
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
22992354
// The only time a CONCAT_VECTORS operation can have legal types is when
23002355
// two 64-bit vectors are concatenated to a 128-bit vector.
@@ -2331,6 +2386,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
23312386

23322387
switch (N->getOpcode()) {
23332388
default: break;
2389+
case ISD::XOR: {
2390+
// Select special operations if XOR node forms integer ABS pattern
2391+
SDNode *ResNode = SelectABSOp(N);
2392+
if (ResNode)
2393+
return ResNode;
2394+
// Other cases are autogenerated.
2395+
break;
2396+
}
23342397
case ISD::Constant: {
23352398
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
23362399
bool UseCP = true;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6100,6 +6100,86 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
61006100
MI->eraseFromParent(); // The pseudo instruction is gone now.
61016101
return BB;
61026102
}
6103+
6104+
case ARM::ABS:
6105+
case ARM::t2ABS: {
6106+
// To insert an ABS instruction, we have to insert the
6107+
// diamond control-flow pattern. The incoming instruction knows the
6108+
// source vreg to test against 0, the destination vreg to set,
6109+
// the condition code register to branch on, the
6110+
// true/false values to select between, and a branch opcode to use.
6111+
// It transforms
6112+
// V1 = ABS V0
6113+
// into
6114+
// V2 = MOVS V0
6115+
// BCC (branch to SinkBB if V0 >= 0)
6116+
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
6117+
// SinkBB: V1 = PHI(V2, V3)
6118+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
6119+
MachineFunction::iterator BBI = BB;
6120+
++BBI;
6121+
MachineFunction *Fn = BB->getParent();
6122+
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
6123+
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
6124+
Fn->insert(BBI, RSBBB);
6125+
Fn->insert(BBI, SinkBB);
6126+
6127+
unsigned int ABSSrcReg = MI->getOperand(1).getReg();
6128+
unsigned int ABSDstReg = MI->getOperand(0).getReg();
6129+
bool isThumb2 = Subtarget->isThumb2();
6130+
MachineRegisterInfo &MRI = Fn->getRegInfo();
6131+
// In Thumb mode S must not be specified if source register is the SP or
6132+
// PC and if destination register is the SP, so restrict register class
6133+
unsigned NewMovDstReg = MRI.createVirtualRegister(
6134+
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6135+
unsigned NewRsbDstReg = MRI.createVirtualRegister(
6136+
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6137+
6138+
// Transfer the remainder of BB and its successor edges to sinkMBB.
6139+
SinkBB->splice(SinkBB->begin(), BB,
6140+
llvm::next(MachineBasicBlock::iterator(MI)),
6141+
BB->end());
6142+
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
6143+
6144+
BB->addSuccessor(RSBBB);
6145+
BB->addSuccessor(SinkBB);
6146+
6147+
// fall through to SinkMBB
6148+
RSBBB->addSuccessor(SinkBB);
6149+
6150+
// insert a movs at the end of BB
6151+
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
6152+
NewMovDstReg)
6153+
.addReg(ABSSrcReg, RegState::Kill)
6154+
.addImm((unsigned)ARMCC::AL).addReg(0)
6155+
.addReg(ARM::CPSR, RegState::Define);
6156+
6157+
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
6158+
BuildMI(BB, dl,
6159+
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
6160+
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
6161+
6162+
// insert rsbri in RSBBB
6163+
// Note: BCC and rsbri will be converted into predicated rsbmi
6164+
// by if-conversion pass
6165+
BuildMI(*RSBBB, RSBBB->begin(), dl,
6166+
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
6167+
.addReg(NewMovDstReg, RegState::Kill)
6168+
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
6169+
6170+
// insert PHI in SinkBB,
6171+
// reuse ABSDstReg to not change uses of ABS instruction
6172+
BuildMI(*SinkBB, SinkBB->begin(), dl,
6173+
TII->get(ARM::PHI), ABSDstReg)
6174+
.addReg(NewRsbDstReg).addMBB(RSBBB)
6175+
.addReg(NewMovDstReg).addMBB(BB);
6176+
6177+
// remove ABS instruction
6178+
MI->eraseFromParent();
6179+
6180+
// return last added BB
6181+
return SinkBB;
6182+
}
61036183
}
61046184
}
61056185

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2848,6 +2848,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
28482848
let Inst{15-12} = Rd;
28492849
}
28502850

2851+
def : ARMInstAlias<"movs${p} $Rd, $Rm",
2852+
(MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
2853+
28512854
// A version for the smaller set of tail call registers.
28522855
let neverHasSideEffects = 1 in
28532856
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -4025,6 +4028,14 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
40254028
let Inst{3-0} = opt;
40264029
}
40274030

4031+
// Pseudo isntruction that combines movs + predicated rsbmi
4032+
// to implement integer ABS
4033+
let usesCustomInserter = 1, Defs = [CPSR] in {
4034+
def ABS : ARMPseudoInst<
4035+
(outs GPR:$dst), (ins GPR:$src),
4036+
8, NoItinerary, []>;
4037+
}
4038+
40284039
let usesCustomInserter = 1 in {
40294040
let Defs = [CPSR] in {
40304041
def ATOMIC_LOAD_ADD_I8 : PseudoInst<

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3433,6 +3433,14 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
34333433
[(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
34343434
imm:$cp))]>,
34353435
Requires<[IsThumb2]>;
3436+
3437+
// Pseudo isntruction that combines movs + predicated rsbmi
3438+
// to implement integer ABS
3439+
let usesCustomInserter = 1, Defs = [CPSR] in {
3440+
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
3441+
NoItinerary, []>, Requires<[IsThumb2]>;
3442+
}
3443+
34363444
//===----------------------------------------------------------------------===//
34373445
// Coprocessor load/store -- for disassembly only
34383446
//

llvm/test/CodeGen/ARM/iabs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
22

33
;; Integer absolute value, should produce something as good as: ARM:
4-
;; add r3, r0, r0, asr #31
5-
;; eor r0, r3, r0, asr #31
4+
;; movs r0, r0
5+
;; rsbmi r0, r0, #0
66
;; bx lr
77

88
define i32 @test(i32 %a) {
99
%tmp1neg = sub i32 0, %a
1010
%b = icmp sgt i32 %a, -1
1111
%abs = select i1 %b, i32 %a, i32 %tmp1neg
1212
ret i32 %abs
13-
; CHECK: add r1, r0, r0, asr #31
14-
; CHECK: eor r0, r1, r0, asr #31
13+
; CHECK: movs r0, r0
14+
; CHECK: rsbmi r0, r0, #0
1515
; CHECK: bx lr
1616
}

llvm/test/CodeGen/Thumb/iabs.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33

44
;; Integer absolute value, should produce something as good as:
55
;; Thumb:
6-
;; asr r2, r0, #31
7-
;; add r0, r0, r2
8-
;; eor r0, r2
6+
;; movs r0, r0
7+
;; bpl
8+
;; rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
99
;; bx lr
1010

1111
define i32 @test(i32 %a) {
1212
%tmp1neg = sub i32 0, %a
1313
%b = icmp sgt i32 %a, -1
1414
%abs = select i1 %b, i32 %a, i32 %tmp1neg
1515
ret i32 %abs
16+
; CHECK: movs r0, r0
17+
; CHECK: bpl
18+
; CHECK: rsb r0, r0, #0
19+
; CHECK: bx lr
1620
}
1721

22+

0 commit comments

Comments
 (0)