@@ -96,6 +96,36 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
96
96
; CHECK-NEXT: .byte 4 // 0x4
97
97
; CHECK-NEXT: .byte 8 // 0x8
98
98
; CHECK-NEXT: .byte 12 // 0xc
99
+ ; CHECK-NEXT: .byte 255 // 0xff
100
+ ; CHECK-NEXT: .byte 255 // 0xff
101
+ ; CHECK-NEXT: .byte 255 // 0xff
102
+ ; CHECK-NEXT: .byte 255 // 0xff
103
+
104
+ define <8 x i8 > @shuffled_tbl2_to_tbl4_v8i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
105
+ ; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8:
106
+ ; CHECK: // %bb.0:
107
+ ; CHECK-NEXT: adrp x8, .LCPI8_0
108
+ ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
109
+ ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
110
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
111
+ ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
112
+ ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
113
+ ; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4
114
+ ; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4
115
+ ; CHECK-NEXT: mov.s v0[1], v1[1]
116
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
117
+ ; CHECK-NEXT: ret
118
+ %t1 = call <8 x i8 > @llvm.aarch64.neon.tbl2.v8i8 (<16 x i8 > %a , <16 x i8 > %b , <8 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
119
+ %t2 = call <8 x i8 > @llvm.aarch64.neon.tbl2.v8i8 (<16 x i8 > %c , <16 x i8 > %d , <8 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
120
+ %s = shufflevector <8 x i8 > %t1 , <8 x i8 > %t2 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 12 , i32 13 , i32 14 , i32 15 >
121
+ ret <8 x i8 > %s
122
+ }
123
+
124
+ ; CHECK-LABEL: .LCPI9_0:
125
+ ; CHECK-NEXT: .byte 0 // 0x0
126
+ ; CHECK-NEXT: .byte 4 // 0x4
127
+ ; CHECK-NEXT: .byte 8 // 0x8
128
+ ; CHECK-NEXT: .byte 12 // 0xc
99
129
; CHECK-NEXT: .byte 16 // 0x10
100
130
; CHECK-NEXT: .byte 20 // 0x14
101
131
; CHECK-NEXT: .byte 24 // 0x18
@@ -112,12 +142,12 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
112
142
define <16 x i8 > @shuffled_tbl2_to_tbl4 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
113
143
; CHECK-LABEL: shuffled_tbl2_to_tbl4:
114
144
; CHECK: // %bb.0:
115
- ; CHECK-NEXT: adrp x8, .LCPI8_0
145
+ ; CHECK-NEXT: adrp x8, .LCPI9_0
116
146
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
117
147
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
118
148
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
119
149
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
120
- ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI8_0 ]
150
+ ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0 ]
121
151
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
122
152
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
123
153
; CHECK-NEXT: mov.d v0[1], v1[0]
@@ -128,19 +158,108 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c
128
158
ret <16 x i8 > %s
129
159
}
130
160
161
+ define <16 x i8 > @shuffled_tbl2_to_tbl4_nonconst_first_mask (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d , i8 %v ) {
162
+ ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
163
+ ; CHECK: // %bb.0:
164
+ ; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff
165
+ ; CHECK-NEXT: adrp x8, .LCPI10_0
166
+ ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
167
+ ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
168
+ ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
169
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
170
+ ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI10_0]
171
+ ; CHECK-NEXT: mov.b v4[0], w0
172
+ ; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
173
+ ; CHECK-NEXT: mov.b v4[1], w0
174
+ ; CHECK-NEXT: mov.b v4[2], w0
175
+ ; CHECK-NEXT: mov.b v4[3], w0
176
+ ; CHECK-NEXT: mov.b v4[4], w0
177
+ ; CHECK-NEXT: mov.b v4[5], w0
178
+ ; CHECK-NEXT: mov.b v4[6], w0
179
+ ; CHECK-NEXT: mov.b v4[7], w0
180
+ ; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
181
+ ; CHECK-NEXT: mov.d v0[1], v2[0]
182
+ ; CHECK-NEXT: ret
183
+ %ins.0 = insertelement <16 x i8 > poison, i8 %v , i32 0
184
+ %ins.1 = insertelement <16 x i8 > %ins.0 , i8 %v , i32 1
185
+ %ins.2 = insertelement <16 x i8 > %ins.1 , i8 %v , i32 2
186
+ %ins.3 = insertelement <16 x i8 > %ins.2 , i8 %v , i32 3
187
+ %ins.4 = insertelement <16 x i8 > %ins.3 , i8 %v , i32 4
188
+ %ins.5 = insertelement <16 x i8 > %ins.4 , i8 %v , i32 5
189
+ %ins.6 = insertelement <16 x i8 > %ins.5 , i8 %v , i32 6
190
+ %ins.7 = insertelement <16 x i8 > %ins.6 , i8 %v , i32 7
191
+ %ins.8 = insertelement <16 x i8 > %ins.7 , i8 -1 , i32 8
192
+ %ins.9 = insertelement <16 x i8 > %ins.8 , i8 -1 , i32 9
193
+ %ins.10 = insertelement <16 x i8 > %ins.9 , i8 -1 , i32 10
194
+ %ins.11 = insertelement <16 x i8 > %ins.10 , i8 -1 , i32 11
195
+ %ins.12 = insertelement <16 x i8 > %ins.11 , i8 -1 , i32 12
196
+ %ins.13 = insertelement <16 x i8 > %ins.12 , i8 -1 , i32 13
197
+ %ins.14 = insertelement <16 x i8 > %ins.13 , i8 -1 , i32 14
198
+ %ins.15 = insertelement <16 x i8 > %ins.14 , i8 -1 , i32 15
199
+ %t1 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %ins.15 )
200
+ %t2 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %c , <16 x i8 > %d , <16 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 16 , i8 20 , i8 24 , i8 28 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
201
+ %s = shufflevector <16 x i8 > %t1 , <16 x i8 > %t2 , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 >
202
+ ret <16 x i8 > %s
203
+ }
204
+
205
+ define <16 x i8 > @shuffled_tbl2_to_tbl4_nonconst_second_mask (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d , i8 %v ) {
206
+ ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
207
+ ; CHECK: // %bb.0:
208
+ ; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff
209
+ ; CHECK-NEXT: adrp x8, .LCPI11_0
210
+ ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
211
+ ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
212
+ ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
213
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
214
+ ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI11_0]
215
+ ; CHECK-NEXT: mov.b v4[0], w0
216
+ ; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5
217
+ ; CHECK-NEXT: mov.b v4[1], w0
218
+ ; CHECK-NEXT: mov.b v4[2], w0
219
+ ; CHECK-NEXT: mov.b v4[3], w0
220
+ ; CHECK-NEXT: mov.b v4[4], w0
221
+ ; CHECK-NEXT: mov.b v4[5], w0
222
+ ; CHECK-NEXT: mov.b v4[6], w0
223
+ ; CHECK-NEXT: mov.b v4[7], w0
224
+ ; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
225
+ ; CHECK-NEXT: mov.d v2[1], v0[0]
226
+ ; CHECK-NEXT: mov.16b v0, v2
227
+ ; CHECK-NEXT: ret
228
+ %ins.0 = insertelement <16 x i8 > poison, i8 %v , i32 0
229
+ %ins.1 = insertelement <16 x i8 > %ins.0 , i8 %v , i32 1
230
+ %ins.2 = insertelement <16 x i8 > %ins.1 , i8 %v , i32 2
231
+ %ins.3 = insertelement <16 x i8 > %ins.2 , i8 %v , i32 3
232
+ %ins.4 = insertelement <16 x i8 > %ins.3 , i8 %v , i32 4
233
+ %ins.5 = insertelement <16 x i8 > %ins.4 , i8 %v , i32 5
234
+ %ins.6 = insertelement <16 x i8 > %ins.5 , i8 %v , i32 6
235
+ %ins.7 = insertelement <16 x i8 > %ins.6 , i8 %v , i32 7
236
+ %ins.8 = insertelement <16 x i8 > %ins.7 , i8 -1 , i32 8
237
+ %ins.9 = insertelement <16 x i8 > %ins.8 , i8 -1 , i32 9
238
+ %ins.10 = insertelement <16 x i8 > %ins.9 , i8 -1 , i32 10
239
+ %ins.11 = insertelement <16 x i8 > %ins.10 , i8 -1 , i32 11
240
+ %ins.12 = insertelement <16 x i8 > %ins.11 , i8 -1 , i32 12
241
+ %ins.13 = insertelement <16 x i8 > %ins.12 , i8 -1 , i32 13
242
+ %ins.14 = insertelement <16 x i8 > %ins.13 , i8 -1 , i32 14
243
+ %ins.15 = insertelement <16 x i8 > %ins.14 , i8 -1 , i32 15
244
+ %t1 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %c , <16 x i8 > %d , <16 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 16 , i8 20 , i8 24 , i8 28 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
245
+ %t2 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %ins.15 )
246
+ %s = shufflevector <16 x i8 > %t1 , <16 x i8 > %t2 , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 >
247
+ ret <16 x i8 > %s
248
+ }
249
+
131
250
define <16 x i8 > @shuffled_tbl2_to_tbl4_incompatible_shuffle (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
132
251
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_shuffle:
133
252
; CHECK: // %bb.0:
134
- ; CHECK-NEXT: adrp x8, .LCPI9_0
253
+ ; CHECK-NEXT: adrp x8, .LCPI12_0
135
254
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
136
255
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
137
256
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
138
257
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
139
- ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0 ]
140
- ; CHECK-NEXT: adrp x8, .LCPI9_1
258
+ ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI12_0 ]
259
+ ; CHECK-NEXT: adrp x8, .LCPI12_1
141
260
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
142
261
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
143
- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1 ]
262
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1 ]
144
263
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
145
264
; CHECK-NEXT: ret
146
265
%t1 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 16 , i8 20 , i8 24 , i8 28 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
@@ -152,18 +271,18 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_shuffle(<16 x i8> %a, <16 x
152
271
define <16 x i8 > @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
153
272
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1:
154
273
; CHECK: // %bb.0:
155
- ; CHECK-NEXT: adrp x8, .LCPI10_0
156
- ; CHECK-NEXT: adrp x9, .LCPI10_1
274
+ ; CHECK-NEXT: adrp x8, .LCPI13_0
275
+ ; CHECK-NEXT: adrp x9, .LCPI13_1
157
276
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
158
277
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
159
278
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
160
279
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
161
- ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0 ]
162
- ; CHECK-NEXT: adrp x8, .LCPI10_2
163
- ; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_1 ]
280
+ ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0 ]
281
+ ; CHECK-NEXT: adrp x8, .LCPI13_2
282
+ ; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI13_1 ]
164
283
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
165
284
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
166
- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_2 ]
285
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2 ]
167
286
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
168
287
; CHECK-NEXT: ret
169
288
%t1 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 16 , i8 20 , i8 24 , i8 28 , i8 0 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
@@ -175,18 +294,18 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1(<16 x i8> %a, <1
175
294
define <16 x i8 > @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
176
295
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2:
177
296
; CHECK: // %bb.0:
178
- ; CHECK-NEXT: adrp x8, .LCPI11_0
179
- ; CHECK-NEXT: adrp x9, .LCPI11_1
297
+ ; CHECK-NEXT: adrp x8, .LCPI14_0
298
+ ; CHECK-NEXT: adrp x9, .LCPI14_1
180
299
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
181
300
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
182
301
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
183
302
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
184
- ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_0 ]
185
- ; CHECK-NEXT: adrp x8, .LCPI11_2
186
- ; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI11_1 ]
303
+ ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0 ]
304
+ ; CHECK-NEXT: adrp x8, .LCPI14_2
305
+ ; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI14_1 ]
187
306
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
188
307
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
189
- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_2 ]
308
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2 ]
190
309
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
191
310
; CHECK-NEXT: ret
192
311
%t1 = call <16 x i8 > @llvm.aarch64.neon.tbl2.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 16 , i8 20 , i8 24 , i8 28 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
0 commit comments