Skip to content

Commit b53e4bf

Browse files
author
git apple-llvm automerger
committed
Merge commit 'e9dcc15f514a' from llvm.org/release/17.x into stable/20230725
2 parents 9b7dff1 + e9dcc15 commit b53e4bf

File tree

3 files changed

+217
-0
lines changed

3 files changed

+217
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,30 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
212212
return (CallerBits & CalleeBits) == CalleeBits;
213213
}
214214

215+
bool AArch64TTIImpl::areTypesABICompatible(
216+
const Function *Caller, const Function *Callee,
217+
const ArrayRef<Type *> &Types) const {
218+
if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
219+
return false;
220+
221+
// We need to ensure that argument promotion does not attempt to promote
222+
// pointers to fixed-length vector types larger than 128 bits like
223+
// <8 x float> (and pointers to aggregate types which have such fixed-length
224+
// vector type members) into the values of the pointees. Such vector types
225+
// are used for SVE VLS but there is no ABI for SVE VLS arguments and the
226+
// backend cannot lower such value arguments. The 128-bit fixed-length SVE
227+
// types can be safely treated as 128-bit NEON types and they cannot be
228+
// distinguished in IR.
229+
if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) {
230+
auto FVTy = dyn_cast<FixedVectorType>(Ty);
231+
return FVTy &&
232+
FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
233+
}))
234+
return false;
235+
236+
return true;
237+
}
238+
215239
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
216240
TargetTransformInfo::RegisterKind K) const {
217241
assert(K != TargetTransformInfo::RGK_Scalar);

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
7777
bool areInlineCompatible(const Function *Caller,
7878
const Function *Callee) const;
7979

80+
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
81+
const ArrayRef<Type *> &Types) const;
82+
8083
/// \name Scalar TTI Implementations
8184
/// @{
8285

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
3+
; RUN: opt -S -passes=argpromotion -mtriple=aarch64-unknwon-linux-gnu < %s | FileCheck %s
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
; Don't promote a vector pointer argument when the pointee type size is greater
8+
; than 128 bits.
9+
10+
define dso_local void @caller_8xi32(ptr noalias %src, ptr noalias %dst) #0 {
11+
; CHECK-LABEL: define dso_local void @caller_8xi32(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: call fastcc void @callee_8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]])
14+
; CHECK-NEXT: ret void
15+
;
16+
entry:
17+
call fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst)
18+
ret void
19+
}
20+
21+
define internal fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst) #0 {
22+
; CHECK-LABEL: define internal fastcc void @callee_8xi32(
23+
; CHECK-NEXT: entry:
24+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16
25+
; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16
26+
; CHECK-NEXT: ret void
27+
;
28+
entry:
29+
%0 = load <8 x i32>, ptr %src, align 16
30+
store <8 x i32> %0, ptr %dst, align 16
31+
ret void
32+
}
33+
34+
; Promote a vector pointer argument when the pointee type size is 128 bits or
35+
; less.
36+
37+
define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
38+
; CHECK-LABEL: define dso_local void @caller_4xi32(
39+
; CHECK-NEXT: entry:
40+
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
41+
; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
42+
; CHECK-NEXT: ret void
43+
;
44+
entry:
45+
call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst)
46+
ret void
47+
}
48+
49+
define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
50+
; CHECK-LABEL: define internal fastcc void @callee_4xi32(
51+
; CHECK-NEXT: entry:
52+
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
53+
; CHECK-NEXT: ret void
54+
;
55+
entry:
56+
%0 = load <4 x i32>, ptr %src, align 16
57+
store <4 x i32> %0, ptr %dst, align 16
58+
ret void
59+
}
60+
61+
; A scalar pointer argument is promoted even when the pointee type size is
62+
; greater than 128 bits.
63+
64+
define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 {
65+
; CHECK-LABEL: define dso_local void @caller_i256(
66+
; CHECK-NEXT: entry:
67+
; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16
68+
; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
69+
; CHECK-NEXT: ret void
70+
;
71+
entry:
72+
call fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst)
73+
ret void
74+
}
75+
76+
define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 {
77+
; CHECK-LABEL: define internal fastcc void @callee_i256(
78+
; CHECK-NEXT: entry:
79+
; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
80+
; CHECK-NEXT: ret void
81+
;
82+
entry:
83+
%0 = load i256, ptr %src, align 16
84+
store i256 %0, ptr %dst, align 16
85+
ret void
86+
}
87+
88+
; A scalable vector pointer argument is not a target of ArgumentPromotionPass.
89+
90+
define dso_local void @caller_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 {
91+
; CHECK-LABEL: define dso_local void @caller_nx4xi32(
92+
; CHECK-NEXT: entry:
93+
; CHECK-NEXT: call fastcc void @callee_nx4xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]])
94+
; CHECK-NEXT: ret void
95+
;
96+
entry:
97+
call fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst)
98+
ret void
99+
}
100+
101+
define internal fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 {
102+
; CHECK-LABEL: define internal fastcc void @callee_nx4xi32(
103+
; CHECK-NEXT: entry:
104+
; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, ptr [[SRC:%.*]], align 16
105+
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP0]], ptr [[DST:%.*]], align 16
106+
; CHECK-NEXT: ret void
107+
;
108+
entry:
109+
%0 = load <vscale x 4 x i32>, ptr %src, align 16
110+
store <vscale x 4 x i32> %0, ptr %dst, align 16
111+
ret void
112+
}
113+
114+
; Don't promote a structure pointer argument when the pointee vector member
115+
; type size is greater than 128 bits.
116+
117+
%struct_8xi32 = type { <8 x i32>, <8 x i32> }
118+
119+
define dso_local void @caller_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 {
120+
; CHECK-LABEL: define dso_local void @caller_struct8xi32(
121+
; CHECK-NEXT: entry:
122+
; CHECK-NEXT: call fastcc void @callee_struct8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]])
123+
; CHECK-NEXT: ret void
124+
;
125+
entry:
126+
call fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst)
127+
ret void
128+
}
129+
130+
define internal fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 {
131+
; CHECK-LABEL: define internal fastcc void @callee_struct8xi32(
132+
; CHECK-NEXT: entry:
133+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16
134+
; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16
135+
; CHECK-NEXT: [[SRC2:%.*]] = getelementptr inbounds [[STRUCT_8XI32:%.*]], ptr [[SRC]], i64 0, i32 1
136+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[SRC2]], align 16
137+
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_8XI32]], ptr [[DST]], i64 0, i32 1
138+
; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[DST2]], align 16
139+
; CHECK-NEXT: ret void
140+
;
141+
entry:
142+
%0 = load <8 x i32>, ptr %src, align 16
143+
store <8 x i32> %0, ptr %dst, align 16
144+
%src2 = getelementptr inbounds %struct_8xi32, ptr %src, i64 0, i32 1
145+
%1 = load <8 x i32>, ptr %src2, align 16
146+
%dst2 = getelementptr inbounds %struct_8xi32, ptr %dst, i64 0, i32 1
147+
store <8 x i32> %1, ptr %dst2, align 16
148+
ret void
149+
}
150+
151+
; Promote a structure pointer argument when the pointee vector member type size
152+
; is 128 bits or less.
153+
154+
%struct_4xi32 = type { <4 x i32>, <4 x i32> }
155+
156+
define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
157+
; CHECK-LABEL: define dso_local void @caller_struct4xi32(
158+
; CHECK-NEXT: entry:
159+
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
160+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16
161+
; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
162+
; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
163+
; CHECK-NEXT: ret void
164+
;
165+
entry:
166+
call fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst)
167+
ret void
168+
}
169+
170+
define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
171+
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32(
172+
; CHECK-NEXT: entry:
173+
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
174+
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1
175+
; CHECK-NEXT: store <4 x i32> [[SRC_16_VAL:%.*]], ptr [[DST2]], align 16
176+
; CHECK-NEXT: ret void
177+
;
178+
entry:
179+
%0 = load <4 x i32>, ptr %src, align 16
180+
store <4 x i32> %0, ptr %dst, align 16
181+
%src2 = getelementptr inbounds %struct_4xi32, ptr %src, i64 0, i32 1
182+
%1 = load <4 x i32>, ptr %src2, align 16
183+
%dst2 = getelementptr inbounds %struct_4xi32, ptr %dst, i64 0, i32 1
184+
store <4 x i32> %1, ptr %dst2, align 16
185+
ret void
186+
}
187+
188+
attributes #0 = { noinline vscale_range(2,2) "target-features"="+v8.2a,+neon,+sve" }
189+
attributes #1 = { noinline vscale_range(1,1) "target-features"="+v8.2a,+neon,+sve" }
190+
attributes #2 = { noinline "target-features"="+v8.2a,+neon,+sve" }

0 commit comments

Comments
 (0)