Skip to content

Commit ac5a5a9

Browse files
committed
[PowerPC] Add default handling for single element vectors, and split/promote vNi1 vectors.
This patch updates the handling of vectors in getPreferredVectorAction(): For single-element and scalable vectors, fall back to default vector legalization handling. For vNi1 vectors, add handling to either split or promote them in order to prevent the production of wide v256i1/v512i1 types. The following assertion is fixed by this patch, as we ended up producing the wide vector types (that are used for MMA) in the backend prior to this fix. ``` Assertion failed: VT.getSizeInBits() == Operand.getValueSizeInBits() && "Cannot BITCAST between types of different sizes!" ``` Differential Revision: https://reviews.llvm.org/D119521
1 parent 0135aa7 commit ac5a5a9

File tree

2 files changed

+225
-2
lines changed

2 files changed

+225
-2
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -765,8 +765,19 @@ namespace llvm {
765765
/// then the VPERM for the shuffle. All in all a very slow sequence.
766766
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
767767
const override {
768-
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
769-
VT.getScalarSizeInBits() % 8 == 0)
768+
// Default handling for scalable and single-element vectors.
769+
if (VT.isScalableVector() || VT.getVectorNumElements() == 1)
770+
return TargetLoweringBase::getPreferredVectorAction(VT);
771+
772+
// Split and promote vNi1 vectors so we don't produce v256i1/v512i1
773+
// types as those are only for MMA instructions.
774+
if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)
775+
return TypeSplitVector;
776+
if (VT.getScalarSizeInBits() == 1)
777+
return TypePromoteInteger;
778+
779+
// Widen vectors that have reasonably sized elements.
780+
if (VT.getScalarSizeInBits() % 8 == 0)
770781
return TypeWidenVector;
771782
return TargetLoweringBase::getPreferredVectorAction(VT);
772783
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
4+
; RUN: < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -vec-extabi \
6+
; RUN: -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-AIX
7+
8+
define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 {
9+
; CHECK-LABEL: SplitPromoteVectorTest:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1
12+
; CHECK-NEXT: mtvsrws v2, r3
13+
; CHECK-NEXT: li r5, 4
14+
; CHECK-NEXT: li r8, 0
15+
; CHECK-NEXT: vcmpequw v3, v2, v3
16+
; CHECK-NEXT: vextubrx r6, r5, v3
17+
; CHECK-NEXT: vextubrx r4, r8, v3
18+
; CHECK-NEXT: rlwimi r4, r6, 1, 30, 30
19+
; CHECK-NEXT: li r6, 8
20+
; CHECK-NEXT: vextubrx r7, r6, v3
21+
; CHECK-NEXT: rlwimi r4, r7, 2, 29, 29
22+
; CHECK-NEXT: li r7, 12
23+
; CHECK-NEXT: vextubrx r9, r7, v3
24+
; CHECK-NEXT: plxv v3, .LCPI0_1@PCREL(0), 1
25+
; CHECK-NEXT: rlwimi r4, r9, 3, 28, 28
26+
; CHECK-NEXT: vcmpequw v3, v2, v3
27+
; CHECK-NEXT: vextubrx r9, r8, v3
28+
; CHECK-NEXT: rlwimi r4, r9, 4, 27, 27
29+
; CHECK-NEXT: vextubrx r9, r5, v3
30+
; CHECK-NEXT: rlwimi r4, r9, 5, 26, 26
31+
; CHECK-NEXT: vextubrx r9, r6, v3
32+
; CHECK-NEXT: rlwimi r4, r9, 6, 25, 25
33+
; CHECK-NEXT: vextubrx r9, r7, v3
34+
; CHECK-NEXT: plxv v3, .LCPI0_2@PCREL(0), 1
35+
; CHECK-NEXT: rlwimi r4, r9, 7, 24, 24
36+
; CHECK-NEXT: vcmpequw v3, v2, v3
37+
; CHECK-NEXT: vextubrx r9, r8, v3
38+
; CHECK-NEXT: rlwimi r4, r9, 8, 23, 23
39+
; CHECK-NEXT: vextubrx r9, r5, v3
40+
; CHECK-NEXT: rlwimi r4, r9, 9, 22, 22
41+
; CHECK-NEXT: vextubrx r9, r6, v3
42+
; CHECK-NEXT: rlwimi r4, r9, 10, 21, 21
43+
; CHECK-NEXT: vextubrx r9, r7, v3
44+
; CHECK-NEXT: plxv v3, .LCPI0_3@PCREL(0), 1
45+
; CHECK-NEXT: rlwimi r4, r9, 11, 20, 20
46+
; CHECK-NEXT: vcmpequw v3, v2, v3
47+
; CHECK-NEXT: vextubrx r9, r8, v3
48+
; CHECK-NEXT: rlwimi r4, r9, 12, 19, 19
49+
; CHECK-NEXT: vextubrx r9, r5, v3
50+
; CHECK-NEXT: rlwimi r4, r9, 13, 18, 18
51+
; CHECK-NEXT: vextubrx r9, r6, v3
52+
; CHECK-NEXT: rlwimi r4, r9, 14, 17, 17
53+
; CHECK-NEXT: vextubrx r9, r7, v3
54+
; CHECK-NEXT: plxv v3, .LCPI0_4@PCREL(0), 1
55+
; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16
56+
; CHECK-NEXT: vcmpequw v3, v2, v3
57+
; CHECK-NEXT: vextubrx r10, r5, v3
58+
; CHECK-NEXT: vextubrx r9, r8, v3
59+
; CHECK-NEXT: rlwimi r9, r10, 1, 30, 30
60+
; CHECK-NEXT: vextubrx r10, r6, v3
61+
; CHECK-NEXT: rlwimi r9, r10, 2, 29, 29
62+
; CHECK-NEXT: vextubrx r10, r7, v3
63+
; CHECK-NEXT: plxv v3, .LCPI0_5@PCREL(0), 1
64+
; CHECK-NEXT: rlwimi r9, r10, 3, 28, 28
65+
; CHECK-NEXT: vcmpequw v3, v2, v3
66+
; CHECK-NEXT: vextubrx r10, r8, v3
67+
; CHECK-NEXT: rlwimi r9, r10, 4, 27, 27
68+
; CHECK-NEXT: vextubrx r10, r5, v3
69+
; CHECK-NEXT: rlwimi r9, r10, 5, 26, 26
70+
; CHECK-NEXT: vextubrx r10, r6, v3
71+
; CHECK-NEXT: rlwimi r9, r10, 6, 25, 25
72+
; CHECK-NEXT: vextubrx r10, r7, v3
73+
; CHECK-NEXT: plxv v3, .LCPI0_6@PCREL(0), 1
74+
; CHECK-NEXT: rlwimi r9, r10, 7, 24, 24
75+
; CHECK-NEXT: vcmpequw v3, v2, v3
76+
; CHECK-NEXT: vextubrx r10, r8, v3
77+
; CHECK-NEXT: rlwimi r9, r10, 8, 23, 23
78+
; CHECK-NEXT: vextubrx r10, r5, v3
79+
; CHECK-NEXT: rlwimi r9, r10, 9, 22, 22
80+
; CHECK-NEXT: vextubrx r10, r6, v3
81+
; CHECK-NEXT: rlwimi r9, r10, 10, 21, 21
82+
; CHECK-NEXT: vextubrx r10, r7, v3
83+
; CHECK-NEXT: plxv v3, .LCPI0_7@PCREL(0), 1
84+
; CHECK-NEXT: rlwimi r9, r10, 11, 20, 20
85+
; CHECK-NEXT: vcmpequw v2, v2, v3
86+
; CHECK-NEXT: vextubrx r8, r8, v2
87+
; CHECK-NEXT: vextubrx r5, r5, v2
88+
; CHECK-NEXT: rlwimi r9, r8, 12, 19, 19
89+
; CHECK-NEXT: rlwimi r9, r5, 13, 18, 18
90+
; CHECK-NEXT: vextubrx r5, r6, v2
91+
; CHECK-NEXT: rlwimi r9, r5, 14, 17, 17
92+
; CHECK-NEXT: vextubrx r5, r7, v2
93+
; CHECK-NEXT: rlwimi r9, r5, 15, 0, 16
94+
; CHECK-NEXT: or r4, r9, r4
95+
; CHECK-NEXT: andi. r4, r4, 65535
96+
; CHECK-NEXT: iseleq r3, 0, r3
97+
; CHECK-NEXT: blr
98+
;
99+
; CHECK-AIX-LABEL: SplitPromoteVectorTest:
100+
; CHECK-AIX: # %bb.0: # %entry
101+
; CHECK-AIX-NEXT: ld 4, L..C0(2) # %const.0
102+
; CHECK-AIX-NEXT: mtvsrws 34, 3
103+
; CHECK-AIX-NEXT: li 8, 15
104+
; CHECK-AIX-NEXT: li 5, 11
105+
; CHECK-AIX-NEXT: lxv 35, 0(4)
106+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
107+
; CHECK-AIX-NEXT: vextublx 4, 8, 3
108+
; CHECK-AIX-NEXT: vextublx 6, 5, 3
109+
; CHECK-AIX-NEXT: clrlwi 4, 4, 31
110+
; CHECK-AIX-NEXT: rlwimi 4, 6, 1, 30, 30
111+
; CHECK-AIX-NEXT: li 6, 7
112+
; CHECK-AIX-NEXT: vextublx 7, 6, 3
113+
; CHECK-AIX-NEXT: rlwimi 4, 7, 2, 29, 29
114+
; CHECK-AIX-NEXT: li 7, 3
115+
; CHECK-AIX-NEXT: vextublx 9, 7, 3
116+
; CHECK-AIX-NEXT: rlwimi 4, 9, 3, 28, 28
117+
; CHECK-AIX-NEXT: ld 9, L..C1(2) # %const.1
118+
; CHECK-AIX-NEXT: lxv 35, 0(9)
119+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
120+
; CHECK-AIX-NEXT: vextublx 9, 8, 3
121+
; CHECK-AIX-NEXT: rlwimi 4, 9, 4, 27, 27
122+
; CHECK-AIX-NEXT: vextublx 9, 5, 3
123+
; CHECK-AIX-NEXT: rlwimi 4, 9, 5, 26, 26
124+
; CHECK-AIX-NEXT: vextublx 9, 6, 3
125+
; CHECK-AIX-NEXT: rlwimi 4, 9, 6, 25, 25
126+
; CHECK-AIX-NEXT: vextublx 9, 7, 3
127+
; CHECK-AIX-NEXT: rlwimi 4, 9, 7, 24, 24
128+
; CHECK-AIX-NEXT: ld 9, L..C2(2) # %const.2
129+
; CHECK-AIX-NEXT: lxv 35, 0(9)
130+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
131+
; CHECK-AIX-NEXT: vextublx 9, 8, 3
132+
; CHECK-AIX-NEXT: rlwimi 4, 9, 8, 23, 23
133+
; CHECK-AIX-NEXT: vextublx 9, 5, 3
134+
; CHECK-AIX-NEXT: rlwimi 4, 9, 9, 22, 22
135+
; CHECK-AIX-NEXT: vextublx 9, 6, 3
136+
; CHECK-AIX-NEXT: rlwimi 4, 9, 10, 21, 21
137+
; CHECK-AIX-NEXT: vextublx 9, 7, 3
138+
; CHECK-AIX-NEXT: rlwimi 4, 9, 11, 20, 20
139+
; CHECK-AIX-NEXT: ld 9, L..C3(2) # %const.3
140+
; CHECK-AIX-NEXT: lxv 35, 0(9)
141+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
142+
; CHECK-AIX-NEXT: vextublx 9, 8, 3
143+
; CHECK-AIX-NEXT: rlwimi 4, 9, 12, 19, 19
144+
; CHECK-AIX-NEXT: vextublx 9, 5, 3
145+
; CHECK-AIX-NEXT: rlwimi 4, 9, 13, 18, 18
146+
; CHECK-AIX-NEXT: vextublx 9, 6, 3
147+
; CHECK-AIX-NEXT: rlwimi 4, 9, 14, 17, 17
148+
; CHECK-AIX-NEXT: vextublx 9, 7, 3
149+
; CHECK-AIX-NEXT: rlwimi 4, 9, 15, 16, 16
150+
; CHECK-AIX-NEXT: ld 9, L..C4(2) # %const.4
151+
; CHECK-AIX-NEXT: lxv 35, 0(9)
152+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
153+
; CHECK-AIX-NEXT: vextublx 9, 8, 3
154+
; CHECK-AIX-NEXT: vextublx 10, 5, 3
155+
; CHECK-AIX-NEXT: clrlwi 9, 9, 31
156+
; CHECK-AIX-NEXT: rlwimi 9, 10, 1, 30, 30
157+
; CHECK-AIX-NEXT: vextublx 10, 6, 3
158+
; CHECK-AIX-NEXT: rlwimi 9, 10, 2, 29, 29
159+
; CHECK-AIX-NEXT: vextublx 10, 7, 3
160+
; CHECK-AIX-NEXT: rlwimi 9, 10, 3, 28, 28
161+
; CHECK-AIX-NEXT: ld 10, L..C5(2) # %const.5
162+
; CHECK-AIX-NEXT: lxv 35, 0(10)
163+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
164+
; CHECK-AIX-NEXT: vextublx 10, 8, 3
165+
; CHECK-AIX-NEXT: rlwimi 9, 10, 4, 27, 27
166+
; CHECK-AIX-NEXT: vextublx 10, 5, 3
167+
; CHECK-AIX-NEXT: rlwimi 9, 10, 5, 26, 26
168+
; CHECK-AIX-NEXT: vextublx 10, 6, 3
169+
; CHECK-AIX-NEXT: rlwimi 9, 10, 6, 25, 25
170+
; CHECK-AIX-NEXT: vextublx 10, 7, 3
171+
; CHECK-AIX-NEXT: rlwimi 9, 10, 7, 24, 24
172+
; CHECK-AIX-NEXT: ld 10, L..C6(2) # %const.6
173+
; CHECK-AIX-NEXT: lxv 35, 0(10)
174+
; CHECK-AIX-NEXT: vcmpequw 3, 2, 3
175+
; CHECK-AIX-NEXT: vextublx 10, 8, 3
176+
; CHECK-AIX-NEXT: rlwimi 9, 10, 8, 23, 23
177+
; CHECK-AIX-NEXT: vextublx 10, 5, 3
178+
; CHECK-AIX-NEXT: rlwimi 9, 10, 9, 22, 22
179+
; CHECK-AIX-NEXT: vextublx 10, 6, 3
180+
; CHECK-AIX-NEXT: rlwimi 9, 10, 10, 21, 21
181+
; CHECK-AIX-NEXT: vextublx 10, 7, 3
182+
; CHECK-AIX-NEXT: rlwimi 9, 10, 11, 20, 20
183+
; CHECK-AIX-NEXT: ld 10, L..C7(2) # %const.7
184+
; CHECK-AIX-NEXT: lxv 35, 0(10)
185+
; CHECK-AIX-NEXT: vcmpequw 2, 2, 3
186+
; CHECK-AIX-NEXT: vextublx 8, 8, 2
187+
; CHECK-AIX-NEXT: vextublx 5, 5, 2
188+
; CHECK-AIX-NEXT: rlwimi 9, 8, 12, 19, 19
189+
; CHECK-AIX-NEXT: rlwimi 9, 5, 13, 18, 18
190+
; CHECK-AIX-NEXT: vextublx 5, 6, 2
191+
; CHECK-AIX-NEXT: rlwimi 9, 5, 14, 17, 17
192+
; CHECK-AIX-NEXT: vextublx 5, 7, 2
193+
; CHECK-AIX-NEXT: rlwimi 9, 5, 15, 16, 16
194+
; CHECK-AIX-NEXT: or 4, 9, 4
195+
; CHECK-AIX-NEXT: andi. 4, 4, 65535
196+
; CHECK-AIX-NEXT: iseleq 3, 0, 3
197+
; CHECK-AIX-NEXT: blr
198+
entry:
199+
%0 = insertelement <32 x i32> poison, i32 %Opc, i64 0
200+
%shuffle = shufflevector <32 x i32> %0, <32 x i32> poison, <32 x i32> zeroinitializer
201+
%1 = icmp eq <32 x i32> %shuffle, <i32 991, i32 888, i32 963, i32 906, i32 944, i32 915, i32 895, i32 952, i32 892, i32 949, i32 974, i32 879, i32 874, i32 943, i32 962, i32 905, i32 914, i32 951, i32 948, i32 894, i32 891, i32 973, i32 878, i32 989, i32 886, i32 987, i32 884, i32 961, i32 904, i32 942, i32 913, i32 893>
202+
%2 = bitcast <32 x i1> %1 to i32
203+
%3 = icmp ne i32 %2, 0
204+
%op.rdx = or i1 %3, false
205+
%op.rdx255 = or i1 %op.rdx, false
206+
%4 = or i1 %op.rdx255, false
207+
%5 = or i1 %4, false
208+
%6 = or i1 %5, false
209+
%7 = or i1 %6, false
210+
%cond = select i1 %7, i32 %Opc, i32 0
211+
ret i32 %cond
212+
}

0 commit comments

Comments
 (0)