Skip to content

Commit d7aeea6

Browse files
authored
[AArch64] optimise SVE prefetch intrinsics with no active lanes (#103052)
This patch extends #73964 and optimises away SVE prefetch intrinsics when predicate is zero.
1 parent bfce1aa commit d7aeea6

File tree

2 files changed

+174
-0
lines changed

2 files changed

+174
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,6 +2165,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21652165
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
21662166
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
21672167
return instCombineSVENoActiveUnaryZero(IC, II);
2168+
case Intrinsic::aarch64_sve_prf:
2169+
case Intrinsic::aarch64_sve_prfb_gather_index:
2170+
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2171+
case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2172+
case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2173+
case Intrinsic::aarch64_sve_prfd_gather_index:
2174+
case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2175+
case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2176+
case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2177+
case Intrinsic::aarch64_sve_prfh_gather_index:
2178+
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2179+
case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2180+
case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2181+
case Intrinsic::aarch64_sve_prfw_gather_index:
2182+
case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2183+
case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2184+
case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2185+
return instCombineSVENoActiveUnaryErase(IC, II, 0);
21682186
case Intrinsic::aarch64_neon_fmaxnm:
21692187
case Intrinsic::aarch64_neon_fminnm:
21702188
return instCombineMaxMinNM(IC, II);
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3+
target triple = "aarch64-unknown-linux-gnu"
4+
5+
define void @test_prf(ptr %base){
6+
; CHECK-LABEL: define void @test_prf(
7+
; CHECK-SAME: ptr [[BASE:%.*]]) {
8+
; CHECK-NEXT: ret void
9+
;
10+
tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> zeroinitializer, ptr %base, i32 1)
11+
ret void
12+
}
13+
14+
define void @test_prfb_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
15+
; CHECK-LABEL: define void @test_prfb_gather_index(
16+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
17+
; CHECK-NEXT: ret void
18+
;
19+
call void @llvm.aarch64.sve.prfb.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
20+
ret void
21+
}
22+
23+
define void @test_prfb_gather_scalar_offset(<vscale x 4 x i32> %bases){
24+
; CHECK-LABEL: define void @test_prfb_gather_scalar_offset(
25+
; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
26+
; CHECK-NEXT: ret void
27+
;
28+
call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
29+
ret void
30+
}
31+
32+
define void @test_prfb_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
33+
; CHECK-LABEL: define void @test_prfb_gather_sxtw_index(
34+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
35+
; CHECK-NEXT: ret void
36+
;
37+
call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
38+
ret void
39+
}
40+
41+
define void @test_prfb_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
42+
; CHECK-LABEL: define void @test_prfb_gather_uxtw_index(
43+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
44+
; CHECK-NEXT: ret void
45+
;
46+
call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
47+
ret void
48+
}
49+
50+
define void @test_prfd_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
51+
; CHECK-LABEL: define void @test_prfd_gather_index(
52+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
53+
; CHECK-NEXT: ret void
54+
;
55+
call void @llvm.aarch64.sve.prfd.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
56+
ret void
57+
}
58+
59+
define void @test_prfd_gather_scalar_offset(<vscale x 4 x i32> %bases){
60+
; CHECK-LABEL: define void @test_prfd_gather_scalar_offset(
61+
; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
62+
; CHECK-NEXT: ret void
63+
;
64+
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
65+
ret void
66+
}
67+
68+
define void @test_prfd_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
69+
; CHECK-LABEL: define void @test_prfd_gather_sxtw_index(
70+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
71+
; CHECK-NEXT: ret void
72+
;
73+
call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
74+
ret void
75+
}
76+
77+
define void @test_prfd_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
78+
; CHECK-LABEL: define void @test_prfd_gather_uxtw_index(
79+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
80+
; CHECK-NEXT: ret void
81+
;
82+
call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
83+
ret void
84+
}
85+
86+
define void @test_prfh_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
87+
; CHECK-LABEL: define void @test_prfh_gather_index(
88+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
89+
; CHECK-NEXT: ret void
90+
;
91+
call void @llvm.aarch64.sve.prfh.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
92+
ret void
93+
}
94+
95+
define void @test_prfh_gather_scalar_offset(<vscale x 4 x i32> %bases){
96+
; CHECK-LABEL: define void @test_prfh_gather_scalar_offset(
97+
; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
98+
; CHECK-NEXT: ret void
99+
;
100+
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
101+
ret void
102+
}
103+
104+
define void @test_prfh_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
105+
; CHECK-LABEL: define void @test_prfh_gather_sxtw_index(
106+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
107+
; CHECK-NEXT: ret void
108+
;
109+
call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
110+
ret void
111+
}
112+
113+
define void @test_prfh_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
114+
; CHECK-LABEL: define void @test_prfh_gather_uxtw_index(
115+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
116+
; CHECK-NEXT: ret void
117+
;
118+
call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
119+
ret void
120+
}
121+
122+
define void @test_prfw_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
123+
; CHECK-LABEL: define void @test_prfw_gather_index(
124+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
125+
; CHECK-NEXT: ret void
126+
;
127+
call void @llvm.aarch64.sve.prfw.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
128+
ret void
129+
}
130+
131+
define void @test_prfw_gather_scalar_offset(<vscale x 4 x i32> %bases){
132+
; CHECK-LABEL: define void @test_prfw_gather_scalar_offset(
133+
; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
134+
; CHECK-NEXT: ret void
135+
;
136+
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
137+
ret void
138+
}
139+
140+
define void @test_prfw_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
141+
; CHECK-LABEL: define void @test_prfw_gather_sxtw_index(
142+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
143+
; CHECK-NEXT: ret void
144+
;
145+
call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
146+
ret void
147+
}
148+
149+
define void @test_prfw_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
150+
; CHECK-LABEL: define void @test_prfw_gather_uxtw_index(
151+
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
152+
; CHECK-NEXT: ret void
153+
;
154+
call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
155+
ret void
156+
}

0 commit comments

Comments
 (0)