Skip to content

Commit fadd1ec

Browse files
[AArch64] Enable CmpBcc fusion for Neoverse-v2 (#90608)
This adds compare and branch instructions fusion for Neoverse V2. According to the Software Optimization Guide: Specific Aarch64 instruction pairs that can be fused are as follows: CMP/CMN (immediate) + B.cond CMP/CMN (register) + B.cond Performance for SPEC2017 is neutral, but another benchmark improves significantly. Results for SPEC2017 on a Neoverse V2: 500.perlbench 0% 502.gcc_r 0% 505.mcf_r -0.15% 523.xalancbmk_r -0.43% 525.x264_r 0% 531.deepsjeng_r 0% 541.leela_r -0.16% 557.xz_r -0.47%
1 parent 2635d04 commit fadd1ec

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
484484
def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
485485
"Neoverse V2 ARM processors", [
486486
FeatureFuseAES,
487+
FeatureCmpBccFusion,
487488
FeatureFuseAdrpAdd,
488489
FeatureALULSLFast,
489490
FeaturePostRAScheduler,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mattr=cmp-bcc-fusion | FileCheck %s
2+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s
3+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s
4+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78ae | FileCheck %s
5+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78c | FileCheck %s
6+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a710 | FileCheck %s
7+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x715 | FileCheck %s
8+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720 | FileCheck %s
9+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720ae | FileCheck %s
10+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s
11+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x2 | FileCheck %s
12+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=neoverse-v2 | FileCheck %s
13+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s
14+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s
15+
; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s
16+
17+
18+
define void @test_cmp_bcc_fusion(i32 %x, i32 %y, i32* %arr) {
19+
entry:
20+
%cmp = icmp eq i32 %x, %y
21+
store i32 %x, i32* %arr, align 4
22+
br i1 %cmp, label %if_true, label %if_false
23+
24+
if_true:
25+
ret void
26+
27+
if_false:
28+
ret void
29+
}
30+
31+
; CHECK-LABEL: test_cmp_bcc_fusion:
32+
; CHECK: str {{w[0-9]}}, [{{x[0-9]}}]
33+
; CHECK-NEXT: subs {{w[0-9]}}, {{w[0-9]}}, {{w[0-9]}}
34+
; CHECK-NEXT: b.ne .LBB0_2
35+
; CHECK-NEXT: b .LBB0_1

0 commit comments

Comments
 (0)