Skip to content

Commit 6d7c09c

Browse files
committed
[SelectOpt] Enable for Apple CPUs.
1 parent 688f859 commit 6d7c09c

File tree

3 files changed

+66
-22
lines changed

3 files changed

+66
-22
lines changed

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
338338
FeatureArithmeticBccFusion,
339339
FeatureArithmeticCbzFusion,
340340
FeatureDisableLatencySchedHeuristic,
341+
FeatureEnableSelectOptimize,
341342
FeatureFuseAddress,
342343
FeatureFuseAES,
343344
FeatureFuseArithmeticLogic,
@@ -354,6 +355,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
354355
FeatureArithmeticBccFusion,
355356
FeatureArithmeticCbzFusion,
356357
FeatureDisableLatencySchedHeuristic,
358+
FeatureEnableSelectOptimize,
357359
FeatureFuseAddress,
358360
FeatureFuseAES,
359361
FeatureFuseArithmeticLogic,
@@ -370,6 +372,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
370372
FeatureArithmeticBccFusion,
371373
FeatureArithmeticCbzFusion,
372374
FeatureDisableLatencySchedHeuristic,
375+
FeatureEnableSelectOptimize,
373376
FeatureFuseAddress,
374377
FeatureFuseAdrpAdd,
375378
FeatureFuseAES,
@@ -387,6 +390,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
387390
FeatureArithmeticBccFusion,
388391
FeatureArithmeticCbzFusion,
389392
FeatureDisableLatencySchedHeuristic,
393+
FeatureEnableSelectOptimize,
390394
FeatureFuseAddress,
391395
FeatureFuseAdrpAdd,
392396
FeatureFuseAES,
@@ -404,6 +408,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
404408
FeatureArithmeticBccFusion,
405409
FeatureArithmeticCbzFusion,
406410
FeatureDisableLatencySchedHeuristic,
411+
FeatureEnableSelectOptimize,
407412
FeatureFuseAddress,
408413
FeatureFuseAES,
409414
FeatureFuseArithmeticLogic,

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4573,6 +4573,21 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
45734573

45744574
bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
45754575
if (EnableOrLikeSelectOpt) {
4576+
switch (ST->getProcFamily()) {
4577+
case AArch64Subtarget::AppleA14:
4578+
case AArch64Subtarget::AppleA15:
4579+
case AArch64Subtarget::AppleA16:
4580+
case AArch64Subtarget::AppleM4:
4581+
// Only treat Adds feeding pointers as select-like.
4582+
if (I->getOpcode() == Instruction::Add ||
4583+
I->getOpcode() == Instruction::Sub)
4584+
return any_of(I->getOperand(0)->users(),
4585+
[](User *U) { return isa<GetElementPtrInst>(U); });
4586+
return false;
4587+
default:
4588+
break;
4589+
}
4590+
45764591
// For the binary operators (e.g. or) we need to be more careful than
45774592
// selects, here we only transform them if they are already at a natural
45784593
// break point in the code - the end of a block with an unconditional

llvm/test/CodeGen/AArch64/selectopt-apple-defaults.ll

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,52 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-a7 -o - %s | FileCheck %s
3-
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m1 -o - %s | FileCheck %s
4-
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m2 -o - %s | FileCheck %s
5-
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m3 -o - %s | FileCheck %s
6-
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m4 -o - %s | FileCheck %s
2+
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-a7 -o - %s | FileCheck --check-prefix=DISABLED %s
3+
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m1 -o - %s | FileCheck --check-prefix=ENABLED %s
4+
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m2 -o - %s | FileCheck --check-prefix=ENABLED %s
5+
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m3 -o - %s | FileCheck --check-prefix=ENABLED %s
6+
; RUN: llc -mtriple=arm64-apple-macosx -O3 -early-ifcvt-limit=0 -select-opti-loop-cycle-gain-threshold=2 -select-opti-loop-gradient-gain-threshold=10 -mcpu=apple-m4 -o - %s | FileCheck --check-prefix=ENABLED %s
77

88
define void @test_select_opt(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.start) {
9-
; CHECK-LABEL: test_select_opt:
10-
; CHECK: ; %bb.0: ; %entry
11-
; CHECK-NEXT: add x8, x2, #1
12-
; CHECK-NEXT: LBB0_1: ; %loop
13-
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
14-
; CHECK-NEXT: ldr x9, [x1, x4, lsl #3]
15-
; CHECK-NEXT: ldr x10, [x1, x2, lsl #3]
16-
; CHECK-NEXT: cmp x9, x10
17-
; CHECK-NEXT: cset w9, lo
18-
; CHECK-NEXT: cinc x2, x2, lo
19-
; CHECK-NEXT: sub x9, x4, x9
20-
; CHECK-NEXT: str x2, [x0, x9, lsl #3]
21-
; CHECK-NEXT: mov x4, x2
22-
; CHECK-NEXT: subs x8, x8, #1
23-
; CHECK-NEXT: b.ne LBB0_1
24-
; CHECK-NEXT: ; %bb.2: ; %exit
25-
; CHECK-NEXT: ret
9+
; DISABLED-LABEL: test_select_opt:
10+
; DISABLED: ; %bb.0: ; %entry
11+
; DISABLED-NEXT: add x8, x2, #1
12+
; DISABLED-NEXT: LBB0_1: ; %loop
13+
; DISABLED-NEXT: ; =>This Inner Loop Header: Depth=1
14+
; DISABLED-NEXT: ldr x9, [x1, x4, lsl #3]
15+
; DISABLED-NEXT: ldr x10, [x1, x2, lsl #3]
16+
; DISABLED-NEXT: cmp x9, x10
17+
; DISABLED-NEXT: cset w9, lo
18+
; DISABLED-NEXT: cinc x2, x2, lo
19+
; DISABLED-NEXT: sub x9, x4, x9
20+
; DISABLED-NEXT: str x2, [x0, x9, lsl #3]
21+
; DISABLED-NEXT: mov x4, x2
22+
; DISABLED-NEXT: subs x8, x8, #1
23+
; DISABLED-NEXT: b.ne LBB0_1
24+
; DISABLED-NEXT: ; %bb.2: ; %exit
25+
; DISABLED-NEXT: ret
26+
;
27+
; ENABLED-LABEL: test_select_opt:
28+
; ENABLED: ; %bb.0: ; %entry
29+
; ENABLED-NEXT: add x8, x2, #1
30+
; ENABLED-NEXT: b LBB0_2
31+
; ENABLED-NEXT: LBB0_1: ; %select.end
32+
; ENABLED-NEXT: ; in Loop: Header=BB0_2 Depth=1
33+
; ENABLED-NEXT: str x2, [x0, x4, lsl #3]
34+
; ENABLED-NEXT: mov x4, x2
35+
; ENABLED-NEXT: subs x8, x8, #1
36+
; ENABLED-NEXT: b.eq LBB0_4
37+
; ENABLED-NEXT: LBB0_2: ; %loop
38+
; ENABLED-NEXT: ; =>This Inner Loop Header: Depth=1
39+
; ENABLED-NEXT: ldr x9, [x1, x4, lsl #3]
40+
; ENABLED-NEXT: ldr x10, [x1, x2, lsl #3]
41+
; ENABLED-NEXT: cmp x9, x10
42+
; ENABLED-NEXT: b.hs LBB0_1
43+
; ENABLED-NEXT: ; %bb.3: ; %select.true.sink
44+
; ENABLED-NEXT: ; in Loop: Header=BB0_2 Depth=1
45+
; ENABLED-NEXT: add x2, x2, #1
46+
; ENABLED-NEXT: sub x4, x4, #1
47+
; ENABLED-NEXT: b LBB0_1
48+
; ENABLED-NEXT: LBB0_4: ; %exit
49+
; ENABLED-NEXT: ret
2650
entry:
2751
br label %loop
2852

0 commit comments

Comments
 (0)