Skip to content

Commit df9c746

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:37daff028fcec27f2be1bb990df77e19c0244ccf into amd-gfx:2c4204e0a79d
Local branch amd-gfx 2c4204e Merged main:ba692301f1697183d1665cc0f410d4235b3036db into amd-gfx:b0a602bc789c Remote branch main 37daff0 [X86] setcc-lowering.ll - regenerate with AVX2 test coverage
2 parents 2c4204e + 37daff0 commit df9c746

26 files changed

+1181
-611
lines changed

clang/lib/Analysis/UnsafeBufferUsage.cpp

Lines changed: 129 additions & 130 deletions
Large diffs are not rendered by default.

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 490982
19+
#define LLVM_MAIN_REVISION 490988
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AArch64/AArch64FastISel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5179,7 +5179,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
51795179
const TargetLibraryInfo *LibInfo) {
51805180

51815181
SMEAttrs CallerAttrs(*FuncInfo.Fn);
5182-
if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5182+
if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5183+
CallerAttrs.hasStreamingInterfaceOrBody() ||
51835184
CallerAttrs.hasStreamingCompatibleInterface())
51845185
return nullptr;
51855186
return new AArch64FastISel(FuncInfo, LibInfo);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25892,7 +25892,8 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
2589225892
auto CallerAttrs = SMEAttrs(*Inst.getFunction());
2589325893
auto CalleeAttrs = SMEAttrs(*Base);
2589425894
if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
25895-
CallerAttrs.requiresLazySave(CalleeAttrs))
25895+
CallerAttrs.requiresLazySave(CalleeAttrs) ||
25896+
CallerAttrs.requiresPreservingZT0(CalleeAttrs))
2589625897
return true;
2589725898
}
2589825899
return false;

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
535535
}
536536

537537
SMEAttrs Attrs(F);
538-
if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
538+
if (Attrs.hasZAState() || Attrs.hasZT0State() ||
539+
Attrs.hasStreamingInterfaceOrBody() ||
539540
Attrs.hasStreamingCompatibleInterface())
540541
return true;
541542

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
5252
const LLT v16s8 = LLT::fixed_vector(16, 8);
5353
const LLT v8s8 = LLT::fixed_vector(8, 8);
5454
const LLT v4s8 = LLT::fixed_vector(4, 8);
55+
const LLT v2s8 = LLT::fixed_vector(2, 8);
5556
const LLT v8s16 = LLT::fixed_vector(8, 16);
5657
const LLT v4s16 = LLT::fixed_vector(4, 16);
5758
const LLT v2s16 = LLT::fixed_vector(2, 16);
@@ -422,8 +423,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
422423
.clampMaxNumElements(0, s64, 2)
423424
.clampMaxNumElements(0, p0, 2)
424425
.lowerIfMemSizeNotPow2()
426+
// TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
427+
.bitcastIf(typeInSet(0, {v4s8}),
428+
[=](const LegalityQuery &Query) {
429+
const LLT VecTy = Query.Types[0];
430+
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
431+
})
425432
.customIf(IsPtrVecPred)
426-
.scalarizeIf(typeIs(0, v2s16), 0);
433+
.scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
427434

428435
getActionDefinitionsBuilder(G_INDEXED_STORE)
429436
// Idx 0 == Ptr, Idx 1 == Val

llvm/test/CodeGen/AArch64/load.ll

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for load_v2i8
6+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for load_v4i8
7+
8+
; ===== Legal Scalars =====
9+
10+
define i8 @load_i8(ptr %ptr){
11+
; CHECK-LABEL: load_i8:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: ldrb w0, [x0]
14+
; CHECK-NEXT: ret
15+
%a = load i8 , ptr %ptr
16+
ret i8 %a
17+
}
18+
19+
define i16 @load_i16(ptr %ptr){
20+
; CHECK-LABEL: load_i16:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: ldrh w0, [x0]
23+
; CHECK-NEXT: ret
24+
%a = load i16 , ptr %ptr
25+
ret i16 %a
26+
}
27+
28+
define i32 @load_i32(ptr %ptr){
29+
; CHECK-LABEL: load_i32:
30+
; CHECK: // %bb.0:
31+
; CHECK-NEXT: ldr w0, [x0]
32+
; CHECK-NEXT: ret
33+
%a = load i32 , ptr %ptr
34+
ret i32 %a
35+
}
36+
37+
define i64 @load_i64(ptr %ptr){
38+
; CHECK-LABEL: load_i64:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: ldr x0, [x0]
41+
; CHECK-NEXT: ret
42+
%a = load i64 , ptr %ptr
43+
ret i64 %a
44+
}
45+
46+
; ===== Legal Vector Types =====
47+
48+
define <8 x i8> @load_v8i8(ptr %ptr){
49+
; CHECK-LABEL: load_v8i8:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: ldr d0, [x0]
52+
; CHECK-NEXT: ret
53+
%a = load <8 x i8>, ptr %ptr
54+
ret <8 x i8> %a
55+
}
56+
57+
define <16 x i8> @load_v16i8(ptr %ptr){
58+
; CHECK-LABEL: load_v16i8:
59+
; CHECK: // %bb.0:
60+
; CHECK-NEXT: ldr q0, [x0]
61+
; CHECK-NEXT: ret
62+
%a = load <16 x i8>, ptr %ptr
63+
ret <16 x i8> %a
64+
}
65+
66+
define <4 x i16> @load_v4i16(ptr %ptr){
67+
; CHECK-LABEL: load_v4i16:
68+
; CHECK: // %bb.0:
69+
; CHECK-NEXT: ldr d0, [x0]
70+
; CHECK-NEXT: ret
71+
%a = load <4 x i16>, ptr %ptr
72+
ret <4 x i16> %a
73+
}
74+
75+
define <8 x i16> @load_v8i16(ptr %ptr){
76+
; CHECK-LABEL: load_v8i16:
77+
; CHECK: // %bb.0:
78+
; CHECK-NEXT: ldr q0, [x0]
79+
; CHECK-NEXT: ret
80+
%a = load <8 x i16>, ptr %ptr
81+
ret <8 x i16> %a
82+
}
83+
84+
define <2 x i32> @load_v2i32(ptr %ptr){
85+
; CHECK-LABEL: load_v2i32:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: ldr d0, [x0]
88+
; CHECK-NEXT: ret
89+
%a = load <2 x i32>, ptr %ptr
90+
ret <2 x i32> %a
91+
}
92+
93+
define <4 x i32> @load_v4i32(ptr %ptr){
94+
; CHECK-LABEL: load_v4i32:
95+
; CHECK: // %bb.0:
96+
; CHECK-NEXT: ldr q0, [x0]
97+
; CHECK-NEXT: ret
98+
%a = load <4 x i32>, ptr %ptr
99+
ret <4 x i32> %a
100+
}
101+
102+
define <2 x i64> @load_v2i64(ptr %ptr){
103+
; CHECK-LABEL: load_v2i64:
104+
; CHECK: // %bb.0:
105+
; CHECK-NEXT: ldr q0, [x0]
106+
; CHECK-NEXT: ret
107+
%a = load <2 x i64>, ptr %ptr
108+
ret <2 x i64> %a
109+
}
110+
111+
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
112+
113+
define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b){
114+
; CHECK-LABEL: load_v2i8:
115+
; CHECK: // %bb.0:
116+
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
117+
; CHECK-NEXT: add x8, x0, #1
118+
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
119+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
120+
; CHECK-NEXT: ret
121+
%a = load <2 x i8>, ptr %ptr
122+
ret <2 x i8> %a
123+
}
124+
125+
define i32 @load_v4i8(ptr %ptr, <4 x i8> %b){
126+
; CHECK-LABEL: load_v4i8:
127+
; CHECK: // %bb.0:
128+
; CHECK-NEXT: ldr w0, [x0]
129+
; CHECK-NEXT: ret
130+
%a = load <4 x i8>, ptr %ptr
131+
%c = bitcast <4 x i8> %a to i32
132+
ret i32 %c
133+
}
134+
135+
define <32 x i8> @load_v32i8(ptr %ptr){
136+
; CHECK-LABEL: load_v32i8:
137+
; CHECK: // %bb.0:
138+
; CHECK-NEXT: ldp q0, q1, [x0]
139+
; CHECK-NEXT: ret
140+
%a = load <32 x i8>, ptr %ptr
141+
ret <32 x i8> %a
142+
}
143+
144+
define <2 x i16> @load_v2i16(ptr %ptr){
145+
; CHECK-SD-LABEL: load_v2i16:
146+
; CHECK-SD: // %bb.0:
147+
; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0]
148+
; CHECK-SD-NEXT: add x8, x0, #2
149+
; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
150+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
151+
; CHECK-SD-NEXT: ret
152+
;
153+
; CHECK-GI-LABEL: load_v2i16:
154+
; CHECK-GI: // %bb.0:
155+
; CHECK-GI-NEXT: ldr h0, [x0]
156+
; CHECK-GI-NEXT: ldr h1, [x0, #2]
157+
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
158+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
159+
; CHECK-GI-NEXT: ret
160+
%a = load <2 x i16>, ptr %ptr
161+
ret <2 x i16> %a
162+
}
163+
164+
define <16 x i16> @load_v16i16(ptr %ptr){
165+
; CHECK-LABEL: load_v16i16:
166+
; CHECK: // %bb.0:
167+
; CHECK-NEXT: ldp q0, q1, [x0]
168+
; CHECK-NEXT: ret
169+
%a = load <16 x i16>, ptr %ptr
170+
ret <16 x i16> %a
171+
}
172+
173+
define <1 x i32> @load_v1i32(ptr %ptr){
174+
; CHECK-LABEL: load_v1i32:
175+
; CHECK: // %bb.0:
176+
; CHECK-NEXT: ldr s0, [x0]
177+
; CHECK-NEXT: ret
178+
%a = load <1 x i32>, ptr %ptr
179+
ret <1 x i32> %a
180+
}
181+
182+
define <8 x i32> @load_v8i32(ptr %ptr){
183+
; CHECK-LABEL: load_v8i32:
184+
; CHECK: // %bb.0:
185+
; CHECK-NEXT: ldp q0, q1, [x0]
186+
; CHECK-NEXT: ret
187+
%a = load <8 x i32>, ptr %ptr
188+
ret <8 x i32> %a
189+
}
190+
191+
define <4 x i64> @load_v4i64(ptr %ptr){
192+
; CHECK-LABEL: load_v4i64:
193+
; CHECK: // %bb.0:
194+
; CHECK-NEXT: ldp q0, q1, [x0]
195+
; CHECK-NEXT: ret
196+
%a = load <4 x i64>, ptr %ptr
197+
ret <4 x i64> %a
198+
}
199+
200+
; ===== Vectors with Non-Pow 2 Widths =====
201+
202+
define <3 x i8> @load_v3i8(ptr %ptr){
203+
; CHECK-SD-LABEL: load_v3i8:
204+
; CHECK-SD: // %bb.0:
205+
; CHECK-SD-NEXT: ldr s0, [x0]
206+
; CHECK-SD-NEXT: umov w0, v0.b[0]
207+
; CHECK-SD-NEXT: umov w1, v0.b[1]
208+
; CHECK-SD-NEXT: umov w2, v0.b[2]
209+
; CHECK-SD-NEXT: ret
210+
;
211+
; CHECK-GI-LABEL: load_v3i8:
212+
; CHECK-GI: // %bb.0:
213+
; CHECK-GI-NEXT: ldrb w8, [x0]
214+
; CHECK-GI-NEXT: ldrb w1, [x0, #1]
215+
; CHECK-GI-NEXT: ldrb w2, [x0, #2]
216+
; CHECK-GI-NEXT: mov w0, w8
217+
; CHECK-GI-NEXT: ret
218+
%a = load <3 x i8>, ptr %ptr
219+
ret <3 x i8> %a
220+
}
221+
222+
define <7 x i8> @load_v7i8(ptr %ptr){
223+
; CHECK-SD-LABEL: load_v7i8:
224+
; CHECK-SD: // %bb.0:
225+
; CHECK-SD-NEXT: ldr d0, [x0]
226+
; CHECK-SD-NEXT: ret
227+
;
228+
; CHECK-GI-LABEL: load_v7i8:
229+
; CHECK-GI: // %bb.0:
230+
; CHECK-GI-NEXT: ldr b0, [x0]
231+
; CHECK-GI-NEXT: ldr b1, [x0, #1]
232+
; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
233+
; CHECK-GI-NEXT: ldr b1, [x0, #2]
234+
; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
235+
; CHECK-GI-NEXT: ldr b1, [x0, #3]
236+
; CHECK-GI-NEXT: mov v0.b[3], v1.b[0]
237+
; CHECK-GI-NEXT: ldr b1, [x0, #4]
238+
; CHECK-GI-NEXT: mov v0.b[4], v1.b[0]
239+
; CHECK-GI-NEXT: ldr b1, [x0, #5]
240+
; CHECK-GI-NEXT: mov v0.b[5], v1.b[0]
241+
; CHECK-GI-NEXT: ldr b1, [x0, #6]
242+
; CHECK-GI-NEXT: mov v0.b[6], v1.b[0]
243+
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
244+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
245+
; CHECK-GI-NEXT: ret
246+
%a = load <7 x i8>, ptr %ptr
247+
ret <7 x i8> %a
248+
}
249+
250+
define <3 x i16> @load_v3i16(ptr %ptr){
251+
; CHECK-SD-LABEL: load_v3i16:
252+
; CHECK-SD: // %bb.0:
253+
; CHECK-SD-NEXT: ldr d0, [x0]
254+
; CHECK-SD-NEXT: ret
255+
;
256+
; CHECK-GI-LABEL: load_v3i16:
257+
; CHECK-GI: // %bb.0:
258+
; CHECK-GI-NEXT: ldr h0, [x0]
259+
; CHECK-GI-NEXT: ldr h1, [x0, #2]
260+
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
261+
; CHECK-GI-NEXT: ldr h1, [x0, #4]
262+
; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
263+
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
264+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
265+
; CHECK-GI-NEXT: ret
266+
%a = load <3 x i16>, ptr %ptr
267+
ret <3 x i16> %a
268+
}
269+
270+
define <7 x i16> @load_v7i16(ptr %ptr){
271+
; CHECK-SD-LABEL: load_v7i16:
272+
; CHECK-SD: // %bb.0:
273+
; CHECK-SD-NEXT: ldr q0, [x0]
274+
; CHECK-SD-NEXT: ret
275+
;
276+
; CHECK-GI-LABEL: load_v7i16:
277+
; CHECK-GI: // %bb.0:
278+
; CHECK-GI-NEXT: ldr h0, [x0]
279+
; CHECK-GI-NEXT: ldr h1, [x0, #2]
280+
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
281+
; CHECK-GI-NEXT: ldr h1, [x0, #4]
282+
; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
283+
; CHECK-GI-NEXT: ldr h1, [x0, #6]
284+
; CHECK-GI-NEXT: mov v0.h[3], v1.h[0]
285+
; CHECK-GI-NEXT: ldr h1, [x0, #8]
286+
; CHECK-GI-NEXT: mov v0.h[4], v1.h[0]
287+
; CHECK-GI-NEXT: ldr h1, [x0, #10]
288+
; CHECK-GI-NEXT: mov v0.h[5], v1.h[0]
289+
; CHECK-GI-NEXT: ldr h1, [x0, #12]
290+
; CHECK-GI-NEXT: mov v0.h[6], v1.h[0]
291+
; CHECK-GI-NEXT: mov v0.h[7], v0.h[0]
292+
; CHECK-GI-NEXT: ret
293+
%a = load <7 x i16>, ptr %ptr
294+
ret <7 x i16> %a
295+
}
296+
297+
define <3 x i32> @load_v3i32(ptr %ptr){
298+
; CHECK-SD-LABEL: load_v3i32:
299+
; CHECK-SD: // %bb.0:
300+
; CHECK-SD-NEXT: ldr q0, [x0]
301+
; CHECK-SD-NEXT: ret
302+
;
303+
; CHECK-GI-LABEL: load_v3i32:
304+
; CHECK-GI: // %bb.0:
305+
; CHECK-GI-NEXT: ldp s0, s1, [x0]
306+
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
307+
; CHECK-GI-NEXT: ldr s1, [x0, #8]
308+
; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
309+
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
310+
; CHECK-GI-NEXT: ret
311+
%a = load <3 x i32>, ptr %ptr
312+
ret <3 x i32> %a
313+
}

0 commit comments

Comments
 (0)