Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 172d6c0

Browse files
committed
[ARM]: Assign cost of scaling used in addressing mode for ARM cores
This patch assigns cost of the scaling used in addressing. On many ARM cores, a negated register offset takes longer than a non-negated register offset, in a register-offset addressing mode. For instance: LDR R0, [R1, R2 LSL #2] LDR R0, [R1, -R2 LSL #2] Above, (1) takes less cycles than (2). By assigning appropriate scaling factor cost, we enable the LLVM to make the right trade-offs in the optimization and code-selection phase. Differential Revision: http://reviews.llvm.org/D24857 Reviewers: jmolloy, rengolin git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284127 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a913b4a commit 172d6c0

File tree

5 files changed

+35
-2
lines changed

5 files changed

+35
-2
lines changed

lib/Target/ARM/ARM.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
9999
// Not to be confused with FeatureHasRetAddrStack (return address stack)
100100
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
101101
"Enable Reliability, Availability and Serviceability extensions">;
102+
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
103+
"Enable fast computation of positive address offsets">;
102104

103105

104106
// Cyclone has preferred instructions for zeroing VFP registers, which can
@@ -773,13 +775,15 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
773775
FeatureHWDiv,
774776
FeatureHWDivARM,
775777
FeatureCrypto,
776-
FeatureCRC]>;
778+
FeatureCRC,
779+
FeatureFPAO]>;
777780

778781
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
779782
FeatureHWDiv,
780783
FeatureHWDivARM,
781784
FeatureCrypto,
782-
FeatureCRC]>;
785+
FeatureCRC,
786+
FeatureFPAO]>;
783787

784788
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
785789
FeatureHWDiv,

lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11612,6 +11612,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
1161211612
return true;
1161311613
}
1161411614

11615+
int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
11616+
const AddrMode &AM, Type *Ty,
11617+
unsigned AS) const {
11618+
if (isLegalAddressingMode(DL, AM, Ty, AS)) {
11619+
if (Subtarget->hasFPAO())
11620+
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
11621+
return 0;
11622+
}
11623+
return -1;
11624+
}
11625+
1161511626

1161611627
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
1161711628
if (V < 0)

lib/Target/ARM/ARMISelLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,14 @@ namespace llvm {
291291
/// by AM is legal for this target, for a load/store of the specified type.
292292
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
293293
Type *Ty, unsigned AS) const override;
294+
295+
/// getScalingFactorCost - Return the cost of the scaling used in
296+
/// addressing mode represented by AM.
297+
/// If the AM is supported, the return value must be >= 0.
298+
/// If the AM is not supported, the return value must be negative.
299+
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
300+
unsigned AS) const override;
301+
294302
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
295303

296304
/// isLegalICmpImmediate - Return true if the specified immediate is legal

lib/Target/ARM/ARMSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
235235
/// particularly effective at zeroing a VFP register.
236236
bool HasZeroCycleZeroing = false;
237237

238+
/// HasFPAO - if true, processor does positive address offset computation faster
239+
bool HasFPAO = false;
240+
238241
/// If true, if conversion may decide to leave some instructions unpredicated.
239242
bool IsProfitableToUnpredicate = false;
240243

@@ -453,6 +456,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
453456
bool hasTrustZone() const { return HasTrustZone; }
454457
bool has8MSecExt() const { return Has8MSecExt; }
455458
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
459+
bool hasFPAO() const { return HasFPAO; }
456460
bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
457461
bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
458462
bool hasSlowVDUP32() const { return HasSlowVDUP32; }

test/CodeGen/ARM/lsr-scale-addr-mode.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
22
; Should use scaled addressing mode.
33

4+
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a53 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A53
5+
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a57 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A57
6+
; Should not generate negated register offset
7+
48
define void @sintzero(i32* %a) nounwind {
59
entry:
610
store i32 0, i32* %a
@@ -19,4 +23,6 @@ return: ; preds = %cond_next
1923
}
2024

2125
; CHECK: lsl{{.*}}#2]
26+
; CHECK-NONEGOFF-A53: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]
27+
; CHECK-NONEGOFF-A57: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]
2228

0 commit comments

Comments
 (0)