Skip to content

Commit 9f2c394

Browse files
committed
[AArch64] Add tests with 2 x tbl2 for v8i8 and nonconst masks.
Extra tests for D133491.
1 parent 989f76c commit 9f2c394

File tree

1 file changed

+137
-18
lines changed

1 file changed

+137
-18
lines changed

llvm/test/CodeGen/AArch64/arm64-tbl.ll

Lines changed: 137 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,36 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
9696
; CHECK-NEXT: .byte 4 // 0x4
9797
; CHECK-NEXT: .byte 8 // 0x8
9898
; CHECK-NEXT: .byte 12 // 0xc
99+
; CHECK-NEXT: .byte 255 // 0xff
100+
; CHECK-NEXT: .byte 255 // 0xff
101+
; CHECK-NEXT: .byte 255 // 0xff
102+
; CHECK-NEXT: .byte 255 // 0xff
103+
104+
define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
105+
; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8:
106+
; CHECK: // %bb.0:
107+
; CHECK-NEXT: adrp x8, .LCPI8_0
108+
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
109+
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
110+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
111+
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
112+
; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
113+
; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4
114+
; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4
115+
; CHECK-NEXT: mov.s v0[1], v1[1]
116+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
117+
; CHECK-NEXT: ret
118+
%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
119+
%t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
120+
%s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
121+
ret <8 x i8> %s
122+
}
123+
124+
; CHECK-LABEL: .LCPI9_0:
125+
; CHECK-NEXT: .byte 0 // 0x0
126+
; CHECK-NEXT: .byte 4 // 0x4
127+
; CHECK-NEXT: .byte 8 // 0x8
128+
; CHECK-NEXT: .byte 12 // 0xc
99129
; CHECK-NEXT: .byte 16 // 0x10
100130
; CHECK-NEXT: .byte 20 // 0x14
101131
; CHECK-NEXT: .byte 24 // 0x18
@@ -112,12 +142,12 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
112142
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
113143
; CHECK-LABEL: shuffled_tbl2_to_tbl4:
114144
; CHECK: // %bb.0:
115-
; CHECK-NEXT: adrp x8, .LCPI8_0
145+
; CHECK-NEXT: adrp x8, .LCPI9_0
116146
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
117147
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
118148
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
119149
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
120-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI8_0]
150+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
121151
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
122152
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
123153
; CHECK-NEXT: mov.d v0[1], v1[0]
@@ -128,19 +158,108 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c
128158
ret <16 x i8> %s
129159
}
130160

161+
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
162+
; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
163+
; CHECK: // %bb.0:
164+
; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff
165+
; CHECK-NEXT: adrp x8, .LCPI10_0
166+
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
167+
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
168+
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
169+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
170+
; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI10_0]
171+
; CHECK-NEXT: mov.b v4[0], w0
172+
; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
173+
; CHECK-NEXT: mov.b v4[1], w0
174+
; CHECK-NEXT: mov.b v4[2], w0
175+
; CHECK-NEXT: mov.b v4[3], w0
176+
; CHECK-NEXT: mov.b v4[4], w0
177+
; CHECK-NEXT: mov.b v4[5], w0
178+
; CHECK-NEXT: mov.b v4[6], w0
179+
; CHECK-NEXT: mov.b v4[7], w0
180+
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
181+
; CHECK-NEXT: mov.d v0[1], v2[0]
182+
; CHECK-NEXT: ret
183+
%ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
184+
%ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
185+
%ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
186+
%ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
187+
%ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
188+
%ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
189+
%ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
190+
%ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
191+
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
192+
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
193+
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
194+
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
195+
%ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
196+
%ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
197+
%ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
198+
%ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
199+
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
200+
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
201+
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
202+
ret <16 x i8> %s
203+
}
204+
205+
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
206+
; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
207+
; CHECK: // %bb.0:
208+
; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff
209+
; CHECK-NEXT: adrp x8, .LCPI11_0
210+
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
211+
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
212+
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
213+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
214+
; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI11_0]
215+
; CHECK-NEXT: mov.b v4[0], w0
216+
; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
217+
; CHECK-NEXT: mov.b v4[1], w0
218+
; CHECK-NEXT: mov.b v4[2], w0
219+
; CHECK-NEXT: mov.b v4[3], w0
220+
; CHECK-NEXT: mov.b v4[4], w0
221+
; CHECK-NEXT: mov.b v4[5], w0
222+
; CHECK-NEXT: mov.b v4[6], w0
223+
; CHECK-NEXT: mov.b v4[7], w0
224+
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
225+
; CHECK-NEXT: mov.d v2[1], v0[0]
226+
; CHECK-NEXT: mov.16b v0, v2
227+
; CHECK-NEXT: ret
228+
%ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
229+
%ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
230+
%ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
231+
%ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
232+
%ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
233+
%ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
234+
%ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
235+
%ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
236+
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
237+
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
238+
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
239+
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
240+
%ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
241+
%ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
242+
%ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
243+
%ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
244+
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
245+
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
246+
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
247+
ret <16 x i8> %s
248+
}
249+
131250
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
132251
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_shuffle:
133252
; CHECK: // %bb.0:
134-
; CHECK-NEXT: adrp x8, .LCPI9_0
253+
; CHECK-NEXT: adrp x8, .LCPI12_0
135254
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
136255
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
137256
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
138257
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
139-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
140-
; CHECK-NEXT: adrp x8, .LCPI9_1
258+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI12_0]
259+
; CHECK-NEXT: adrp x8, .LCPI12_1
141260
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
142261
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
143-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
262+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1]
144263
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
145264
; CHECK-NEXT: ret
146265
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
@@ -152,18 +271,18 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_shuffle(<16 x i8> %a, <16 x
152271
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
153272
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1:
154273
; CHECK: // %bb.0:
155-
; CHECK-NEXT: adrp x8, .LCPI10_0
156-
; CHECK-NEXT: adrp x9, .LCPI10_1
274+
; CHECK-NEXT: adrp x8, .LCPI13_0
275+
; CHECK-NEXT: adrp x9, .LCPI13_1
157276
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
158277
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
159278
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
160279
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
161-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0]
162-
; CHECK-NEXT: adrp x8, .LCPI10_2
163-
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_1]
280+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0]
281+
; CHECK-NEXT: adrp x8, .LCPI13_2
282+
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI13_1]
164283
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
165284
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
166-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_2]
285+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2]
167286
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
168287
; CHECK-NEXT: ret
169288
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
@@ -175,18 +294,18 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1(<16 x i8> %a, <1
175294
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
176295
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2:
177296
; CHECK: // %bb.0:
178-
; CHECK-NEXT: adrp x8, .LCPI11_0
179-
; CHECK-NEXT: adrp x9, .LCPI11_1
297+
; CHECK-NEXT: adrp x8, .LCPI14_0
298+
; CHECK-NEXT: adrp x9, .LCPI14_1
180299
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
181300
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
182301
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
183302
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
184-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_0]
185-
; CHECK-NEXT: adrp x8, .LCPI11_2
186-
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI11_1]
303+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
304+
; CHECK-NEXT: adrp x8, .LCPI14_2
305+
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI14_1]
187306
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
188307
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
189-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_2]
308+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2]
190309
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
191310
; CHECK-NEXT: ret
192311
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)

0 commit comments

Comments
 (0)