Skip to content

Commit 8d03631

Browse files
committed
use TypeWidenVector for most illegal vector types
1 parent 2e3729b commit 8d03631

File tree

4 files changed

+633
-0
lines changed

4 files changed

+633
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6570,3 +6570,12 @@ bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
65706570

65716571
return true;
65726572
}
6573+
6574+
TargetLoweringBase::LegalizeTypeAction
6575+
LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
6576+
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
6577+
VT.getVectorElementType() != MVT::i1)
6578+
return TypeWidenVector;
6579+
6580+
return TargetLoweringBase::getPreferredVectorAction(VT);
6581+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ class LoongArchTargetLowering : public TargetLowering {
281281
Align &PrefAlign) const override;
282282

283283
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const;
284+
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
284285

285286
private:
286287
/// Target-specific function used to lower LoongArch calling conventions.
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
3+
4+
5+
define void @load_sext_2i8_to_2i64(ptr%ptr, ptr%dst) {
6+
; CHECK-LABEL: load_sext_2i8_to_2i64:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: ld.h $a0, $a0, 0
9+
; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI0_0)
10+
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI0_0)
11+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
12+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
13+
; CHECK-NEXT: vslli.d $vr0, $vr0, 56
14+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
15+
; CHECK-NEXT: vst $vr0, $a1, 0
16+
; CHECK-NEXT: ret
17+
entry:
18+
%A = load <2 x i8>, ptr %ptr
19+
%B = sext <2 x i8> %A to <2 x i64>
20+
store <2 x i64> %B, ptr %dst
21+
ret void
22+
}
23+
24+
define void @load_sext_4i8_to_4i32(ptr%ptr, ptr%dst) {
25+
; CHECK-LABEL: load_sext_4i8_to_4i32:
26+
; CHECK: # %bb.0: # %entry
27+
; CHECK-NEXT: ld.w $a0, $a0, 0
28+
; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
29+
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI1_0)
30+
; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
31+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
32+
; CHECK-NEXT: vslli.w $vr0, $vr0, 24
33+
; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
34+
; CHECK-NEXT: vst $vr0, $a1, 0
35+
; CHECK-NEXT: ret
36+
entry:
37+
%A = load <4 x i8>, ptr %ptr
38+
%B = sext <4 x i8> %A to <4 x i32>
39+
store <4 x i32> %B, ptr %dst
40+
ret void
41+
}
42+
43+
define void @load_sext_8i8_to_8i16(ptr%ptr, ptr%dst) {
44+
; CHECK-LABEL: load_sext_8i8_to_8i16:
45+
; CHECK: # %bb.0: # %entry
46+
; CHECK-NEXT: ld.d $a0, $a0, 0
47+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
48+
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
49+
; CHECK-NEXT: vslli.h $vr0, $vr0, 8
50+
; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
51+
; CHECK-NEXT: vst $vr0, $a1, 0
52+
; CHECK-NEXT: ret
53+
entry:
54+
%A = load <8 x i8>, ptr %ptr
55+
%B = sext <8 x i8> %A to <8 x i16>
56+
store <8 x i16> %B, ptr %dst
57+
ret void
58+
}
59+
60+
define void @load_sext_2i16_to_2i64(ptr%ptr, ptr%dst) {
61+
; CHECK-LABEL: load_sext_2i16_to_2i64:
62+
; CHECK: # %bb.0: # %entry
63+
; CHECK-NEXT: ld.w $a0, $a0, 0
64+
; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
65+
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI3_0)
66+
; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
67+
; CHECK-NEXT: vshuf.h $vr0, $vr0, $vr1
68+
; CHECK-NEXT: vslli.d $vr0, $vr0, 48
69+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
70+
; CHECK-NEXT: vst $vr0, $a1, 0
71+
; CHECK-NEXT: ret
72+
entry:
73+
%A = load <2 x i16>, ptr %ptr
74+
%B = sext <2 x i16> %A to <2 x i64>
75+
store <2 x i64> %B, ptr %dst
76+
ret void
77+
}
78+
79+
define void @load_sext_4i16_to_4i32(ptr%ptr, ptr%dst) {
80+
; CHECK-LABEL: load_sext_4i16_to_4i32:
81+
; CHECK: # %bb.0: # %entry
82+
; CHECK-NEXT: ld.d $a0, $a0, 0
83+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
84+
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
85+
; CHECK-NEXT: vslli.w $vr0, $vr0, 16
86+
; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
87+
; CHECK-NEXT: vst $vr0, $a1, 0
88+
; CHECK-NEXT: ret
89+
entry:
90+
%A = load <4 x i16>, ptr %ptr
91+
%B = sext <4 x i16> %A to <4 x i32>
92+
store <4 x i32> %B, ptr %dst
93+
ret void
94+
}
95+
96+
define void @load_sext_2i32_to_2i64(ptr%ptr, ptr%dst) {
97+
; CHECK-LABEL: load_sext_2i32_to_2i64:
98+
; CHECK: # %bb.0: # %entry
99+
; CHECK-NEXT: ld.d $a0, $a0, 0
100+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
101+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
102+
; CHECK-NEXT: vslli.d $vr0, $vr0, 32
103+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
104+
; CHECK-NEXT: vst $vr0, $a1, 0
105+
; CHECK-NEXT: ret
106+
entry:
107+
%A = load <2 x i32>, ptr %ptr
108+
%B = sext <2 x i32> %A to <2 x i64>
109+
store <2 x i64> %B, ptr %dst
110+
ret void
111+
}
112+
113+
define void @load_sext_16i8_to_16i16(ptr%ptr, ptr%dst) {
114+
; CHECK-LABEL: load_sext_16i8_to_16i16:
115+
; CHECK: # %bb.0: # %entry
116+
; CHECK-NEXT: vld $vr0, $a0, 0
117+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
118+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
119+
; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
120+
; CHECK-NEXT: vilvl.b $vr1, $vr1, $vr1
121+
; CHECK-NEXT: vslli.h $vr1, $vr1, 8
122+
; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
123+
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
124+
; CHECK-NEXT: vslli.h $vr0, $vr0, 8
125+
; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
126+
; CHECK-NEXT: vst $vr0, $a1, 0
127+
; CHECK-NEXT: vst $vr1, $a1, 16
128+
; CHECK-NEXT: ret
129+
entry:
130+
%A = load <16 x i8>, ptr %ptr
131+
%B = sext <16 x i8> %A to <16 x i16>
132+
store <16 x i16> %B, ptr %dst
133+
ret void
134+
}
135+
136+
define void @load_sext_16i8_to_16i32(ptr%ptr, ptr%dst) {
137+
; CHECK-LABEL: load_sext_16i8_to_16i32:
138+
; CHECK: # %bb.0: # %entry
139+
; CHECK-NEXT: vld $vr0, $a0, 0
140+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
141+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
142+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_1)
143+
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_1)
144+
; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1
145+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_2)
146+
; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI7_2)
147+
; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr1, $vr2
148+
; CHECK-NEXT: vslli.w $vr1, $vr1, 24
149+
; CHECK-NEXT: vsrai.w $vr1, $vr1, 24
150+
; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3
151+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_3)
152+
; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI7_3)
153+
; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr2
154+
; CHECK-NEXT: vslli.w $vr3, $vr3, 24
155+
; CHECK-NEXT: vsrai.w $vr3, $vr3, 24
156+
; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4
157+
; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr2
158+
; CHECK-NEXT: vslli.w $vr4, $vr4, 24
159+
; CHECK-NEXT: vsrai.w $vr4, $vr4, 24
160+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr2
161+
; CHECK-NEXT: vslli.w $vr0, $vr0, 24
162+
; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
163+
; CHECK-NEXT: vst $vr0, $a1, 0
164+
; CHECK-NEXT: vst $vr4, $a1, 48
165+
; CHECK-NEXT: vst $vr3, $a1, 32
166+
; CHECK-NEXT: vst $vr1, $a1, 16
167+
; CHECK-NEXT: ret
168+
entry:
169+
%A = load <16 x i8>, ptr %ptr
170+
%B = sext <16 x i8> %A to <16 x i32>
171+
store <16 x i32> %B, ptr %dst
172+
ret void
173+
}
174+
175+
define void @load_sext_16i8_to_16i64(ptr%ptr, ptr%dst) {
176+
; CHECK-LABEL: load_sext_16i8_to_16i64:
177+
; CHECK: # %bb.0: # %entry
178+
; CHECK-NEXT: vld $vr0, $a0, 0
179+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
180+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
181+
; CHECK-NEXT: vshuf4i.b $vr2, $vr0, 14
182+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1)
183+
; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI8_1)
184+
; CHECK-NEXT: vshuf.b $vr2, $vr0, $vr2, $vr1
185+
; CHECK-NEXT: vslli.d $vr2, $vr2, 56
186+
; CHECK-NEXT: vsrai.d $vr2, $vr2, 56
187+
; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3
188+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_2)
189+
; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI8_2)
190+
; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr1
191+
; CHECK-NEXT: vslli.d $vr3, $vr3, 56
192+
; CHECK-NEXT: vsrai.d $vr3, $vr3, 56
193+
; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4
194+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_3)
195+
; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI8_3)
196+
; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr1
197+
; CHECK-NEXT: vslli.d $vr4, $vr4, 56
198+
; CHECK-NEXT: vsrai.d $vr4, $vr4, 56
199+
; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5
200+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_4)
201+
; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI8_4)
202+
; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr5, $vr1
203+
; CHECK-NEXT: vslli.d $vr5, $vr5, 56
204+
; CHECK-NEXT: vsrai.d $vr5, $vr5, 56
205+
; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr0, $vr6
206+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_5)
207+
; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI8_5)
208+
; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr6, $vr1
209+
; CHECK-NEXT: vslli.d $vr6, $vr6, 56
210+
; CHECK-NEXT: vsrai.d $vr6, $vr6, 56
211+
; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr0, $vr7
212+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_6)
213+
; CHECK-NEXT: vld $vr8, $a0, %pc_lo12(.LCPI8_6)
214+
; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr7, $vr1
215+
; CHECK-NEXT: vslli.d $vr7, $vr7, 56
216+
; CHECK-NEXT: vsrai.d $vr7, $vr7, 56
217+
; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr0, $vr8
218+
; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr8, $vr1
219+
; CHECK-NEXT: vslli.d $vr8, $vr8, 56
220+
; CHECK-NEXT: vsrai.d $vr8, $vr8, 56
221+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
222+
; CHECK-NEXT: vslli.d $vr0, $vr0, 56
223+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
224+
; CHECK-NEXT: vst $vr0, $a1, 0
225+
; CHECK-NEXT: vst $vr8, $a1, 112
226+
; CHECK-NEXT: vst $vr7, $a1, 96
227+
; CHECK-NEXT: vst $vr6, $a1, 80
228+
; CHECK-NEXT: vst $vr5, $a1, 64
229+
; CHECK-NEXT: vst $vr4, $a1, 48
230+
; CHECK-NEXT: vst $vr3, $a1, 32
231+
; CHECK-NEXT: vst $vr2, $a1, 16
232+
; CHECK-NEXT: ret
233+
entry:
234+
%A = load <16 x i8>, ptr %ptr
235+
%B = sext <16 x i8> %A to <16 x i64>
236+
store <16 x i64> %B, ptr %dst
237+
ret void
238+
}
239+
240+
define void @load_sext_8i16_to_8i32(ptr%ptr, ptr%dst) {
241+
; CHECK-LABEL: load_sext_8i16_to_8i32:
242+
; CHECK: # %bb.0: # %entry
243+
; CHECK-NEXT: vld $vr0, $a0, 0
244+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
245+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
246+
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
247+
; CHECK-NEXT: vilvl.h $vr1, $vr1, $vr1
248+
; CHECK-NEXT: vslli.w $vr1, $vr1, 16
249+
; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
250+
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
251+
; CHECK-NEXT: vslli.w $vr0, $vr0, 16
252+
; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
253+
; CHECK-NEXT: vst $vr0, $a1, 0
254+
; CHECK-NEXT: vst $vr1, $a1, 16
255+
; CHECK-NEXT: ret
256+
entry:
257+
%A = load <8 x i16>, ptr %ptr
258+
%B = sext <8 x i16> %A to <8 x i32>
259+
store <8 x i32> %B, ptr %dst
260+
ret void
261+
}
262+
263+
define void @load_sext_8i16_to_8i64(ptr%ptr, ptr%dst) {
264+
; CHECK-LABEL: load_sext_8i16_to_8i64:
265+
; CHECK: # %bb.0: # %entry
266+
; CHECK-NEXT: vld $vr0, $a0, 0
267+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
268+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
269+
; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14
270+
; CHECK-NEXT: vori.b $vr3, $vr1, 0
271+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
272+
; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI10_1)
273+
; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr2
274+
; CHECK-NEXT: vslli.d $vr2, $vr3, 48
275+
; CHECK-NEXT: vsrai.d $vr2, $vr2, 48
276+
; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr0
277+
; CHECK-NEXT: vori.b $vr3, $vr1, 0
278+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_2)
279+
; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI10_2)
280+
; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr4
281+
; CHECK-NEXT: vslli.d $vr3, $vr3, 48
282+
; CHECK-NEXT: vsrai.d $vr3, $vr3, 48
283+
; CHECK-NEXT: vshuf.h $vr5, $vr0, $vr0
284+
; CHECK-NEXT: vori.b $vr4, $vr1, 0
285+
; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr5
286+
; CHECK-NEXT: vslli.d $vr4, $vr4, 48
287+
; CHECK-NEXT: vsrai.d $vr4, $vr4, 48
288+
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
289+
; CHECK-NEXT: vslli.d $vr0, $vr1, 48
290+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
291+
; CHECK-NEXT: vst $vr0, $a1, 0
292+
; CHECK-NEXT: vst $vr4, $a1, 48
293+
; CHECK-NEXT: vst $vr3, $a1, 32
294+
; CHECK-NEXT: vst $vr2, $a1, 16
295+
; CHECK-NEXT: ret
296+
entry:
297+
%A = load <8 x i16>, ptr %ptr
298+
%B = sext <8 x i16> %A to <8 x i64>
299+
store <8 x i64> %B, ptr %dst
300+
ret void
301+
}
302+
303+
define void @load_sext_4i32_to_4i64(ptr%ptr, ptr%dst) {
304+
; CHECK-LABEL: load_sext_4i32_to_4i64:
305+
; CHECK: # %bb.0: # %entry
306+
; CHECK-NEXT: vld $vr0, $a0, 0
307+
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
308+
; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 16
309+
; CHECK-NEXT: vslli.d $vr1, $vr1, 32
310+
; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
311+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
312+
; CHECK-NEXT: vslli.d $vr0, $vr0, 32
313+
; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
314+
; CHECK-NEXT: vst $vr0, $a1, 0
315+
; CHECK-NEXT: vst $vr1, $a1, 16
316+
; CHECK-NEXT: ret
317+
entry:
318+
%A = load <4 x i32>, ptr %ptr
319+
%B = sext <4 x i32> %A to <4 x i64>
320+
store <4 x i64> %B, ptr %dst
321+
ret void
322+
}

0 commit comments

Comments
 (0)