@@ -75,21 +75,24 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
75
75
define void @select_v32i8 (ptr %a , ptr %b ) #0 {
76
76
; CHECK-LABEL: select_v32i8:
77
77
; CHECK: // %bb.0:
78
- ; CHECK-NEXT: ldp q0, q1 , [x1]
78
+ ; CHECK-NEXT: ldp q1, q0 , [x1]
79
79
; CHECK-NEXT: adrp x8, .LCPI3_0
80
+ ; CHECK-NEXT: ptrue p0.b, vl16
80
81
; CHECK-NEXT: ldp q3, q2, [x0]
81
- ; CHECK-NEXT: cmeq v6.16b, v3.16b, v0.16b
82
82
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0]
83
- ; CHECK-NEXT: and z3.d, z3.d, z6.d
84
- ; CHECK-NEXT: cmeq v5.16b, v2.16b, v1.16b
83
+ ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z0.b
84
+ ; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z1.b
85
+ ; CHECK-NEXT: mov z5.b, p1/z, #-1 // =0xffffffffffffffff
86
+ ; CHECK-NEXT: mov z6.b, p0/z, #-1 // =0xffffffffffffffff
85
87
; CHECK-NEXT: and z2.d, z2.d, z5.d
86
88
; CHECK-NEXT: eor z5.d, z5.d, z4.d
87
89
; CHECK-NEXT: eor z4.d, z6.d, z4.d
88
- ; CHECK-NEXT: and z1.d, z1.d, z5.d
89
- ; CHECK-NEXT: and z0.d, z0.d, z4.d
90
- ; CHECK-NEXT: orr z1.d, z2.d, z1.d
91
- ; CHECK-NEXT: orr z0.d, z3.d, z0.d
92
- ; CHECK-NEXT: stp q0, q1, [x0]
90
+ ; CHECK-NEXT: and z3.d, z3.d, z6.d
91
+ ; CHECK-NEXT: and z1.d, z1.d, z4.d
92
+ ; CHECK-NEXT: and z0.d, z0.d, z5.d
93
+ ; CHECK-NEXT: orr z1.d, z3.d, z1.d
94
+ ; CHECK-NEXT: orr z0.d, z2.d, z0.d
95
+ ; CHECK-NEXT: stp q1, q0, [x0]
93
96
; CHECK-NEXT: ret
94
97
%op1 = load <32 x i8 >, ptr %a
95
98
%op2 = load <32 x i8 >, ptr %b
@@ -172,21 +175,24 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #
172
175
define void @select_v16i16 (ptr %a , ptr %b ) #0 {
173
176
; CHECK-LABEL: select_v16i16:
174
177
; CHECK: // %bb.0:
175
- ; CHECK-NEXT: ldp q0, q1 , [x1]
178
+ ; CHECK-NEXT: ldp q1, q0 , [x1]
176
179
; CHECK-NEXT: adrp x8, .LCPI7_0
180
+ ; CHECK-NEXT: ptrue p0.h, vl8
177
181
; CHECK-NEXT: ldp q3, q2, [x0]
178
- ; CHECK-NEXT: cmeq v6.8h, v3.8h, v0.8h
179
182
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI7_0]
180
- ; CHECK-NEXT: and z3.d, z3.d, z6.d
181
- ; CHECK-NEXT: cmeq v5.8h, v2.8h, v1.8h
183
+ ; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z0.h
184
+ ; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z1.h
185
+ ; CHECK-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff
186
+ ; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff
182
187
; CHECK-NEXT: and z2.d, z2.d, z5.d
183
188
; CHECK-NEXT: eor z5.d, z5.d, z4.d
184
189
; CHECK-NEXT: eor z4.d, z6.d, z4.d
185
- ; CHECK-NEXT: and z1.d, z1.d, z5.d
186
- ; CHECK-NEXT: and z0.d, z0.d, z4.d
187
- ; CHECK-NEXT: orr z1.d, z2.d, z1.d
188
- ; CHECK-NEXT: orr z0.d, z3.d, z0.d
189
- ; CHECK-NEXT: stp q0, q1, [x0]
190
+ ; CHECK-NEXT: and z3.d, z3.d, z6.d
191
+ ; CHECK-NEXT: and z1.d, z1.d, z4.d
192
+ ; CHECK-NEXT: and z0.d, z0.d, z5.d
193
+ ; CHECK-NEXT: orr z1.d, z3.d, z1.d
194
+ ; CHECK-NEXT: orr z0.d, z2.d, z0.d
195
+ ; CHECK-NEXT: stp q1, q0, [x0]
190
196
; CHECK-NEXT: ret
191
197
%op1 = load <16 x i16 >, ptr %a
192
198
%op2 = load <16 x i16 >, ptr %b
@@ -246,21 +252,24 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #
246
252
define void @select_v8i32 (ptr %a , ptr %b ) #0 {
247
253
; CHECK-LABEL: select_v8i32:
248
254
; CHECK: // %bb.0:
249
- ; CHECK-NEXT: ldp q0, q1 , [x1]
255
+ ; CHECK-NEXT: ldp q1, q0 , [x1]
250
256
; CHECK-NEXT: adrp x8, .LCPI10_0
257
+ ; CHECK-NEXT: ptrue p0.s, vl4
251
258
; CHECK-NEXT: ldp q3, q2, [x0]
252
- ; CHECK-NEXT: cmeq v6.4s, v3.4s, v0.4s
253
259
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0]
254
- ; CHECK-NEXT: and z3.d, z3.d, z6.d
255
- ; CHECK-NEXT: cmeq v5.4s, v2.4s, v1.4s
260
+ ; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z0.s
261
+ ; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z1.s
262
+ ; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff
263
+ ; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff
256
264
; CHECK-NEXT: and z2.d, z2.d, z5.d
257
265
; CHECK-NEXT: eor z5.d, z5.d, z4.d
258
266
; CHECK-NEXT: eor z4.d, z6.d, z4.d
259
- ; CHECK-NEXT: and z1.d, z1.d, z5.d
260
- ; CHECK-NEXT: and z0.d, z0.d, z4.d
261
- ; CHECK-NEXT: orr z1.d, z2.d, z1.d
262
- ; CHECK-NEXT: orr z0.d, z3.d, z0.d
263
- ; CHECK-NEXT: stp q0, q1, [x0]
267
+ ; CHECK-NEXT: and z3.d, z3.d, z6.d
268
+ ; CHECK-NEXT: and z1.d, z1.d, z4.d
269
+ ; CHECK-NEXT: and z0.d, z0.d, z5.d
270
+ ; CHECK-NEXT: orr z1.d, z3.d, z1.d
271
+ ; CHECK-NEXT: orr z0.d, z2.d, z0.d
272
+ ; CHECK-NEXT: stp q1, q0, [x0]
264
273
; CHECK-NEXT: ret
265
274
%op1 = load <8 x i32 >, ptr %a
266
275
%op2 = load <8 x i32 >, ptr %b
@@ -317,21 +326,24 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #
317
326
define void @select_v4i64 (ptr %a , ptr %b ) #0 {
318
327
; CHECK-LABEL: select_v4i64:
319
328
; CHECK: // %bb.0:
320
- ; CHECK-NEXT: ldp q0, q1 , [x1]
329
+ ; CHECK-NEXT: ldp q1, q0 , [x1]
321
330
; CHECK-NEXT: adrp x8, .LCPI13_0
331
+ ; CHECK-NEXT: ptrue p0.d, vl2
322
332
; CHECK-NEXT: ldp q3, q2, [x0]
323
- ; CHECK-NEXT: cmeq v6.2d, v3.2d, v0.2d
324
333
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0]
325
- ; CHECK-NEXT: and z3.d, z3.d, z6.d
326
- ; CHECK-NEXT: cmeq v5.2d, v2.2d, v1.2d
334
+ ; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z0.d
335
+ ; CHECK-NEXT: cmpeq p0.d, p0/z, z3.d, z1.d
336
+ ; CHECK-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff
337
+ ; CHECK-NEXT: mov z6.d, p0/z, #-1 // =0xffffffffffffffff
327
338
; CHECK-NEXT: and z2.d, z2.d, z5.d
328
339
; CHECK-NEXT: eor z5.d, z5.d, z4.d
329
340
; CHECK-NEXT: eor z4.d, z6.d, z4.d
330
- ; CHECK-NEXT: and z1.d, z1.d, z5.d
331
- ; CHECK-NEXT: and z0.d, z0.d, z4.d
332
- ; CHECK-NEXT: orr z1.d, z2.d, z1.d
333
- ; CHECK-NEXT: orr z0.d, z3.d, z0.d
334
- ; CHECK-NEXT: stp q0, q1, [x0]
341
+ ; CHECK-NEXT: and z3.d, z3.d, z6.d
342
+ ; CHECK-NEXT: and z1.d, z1.d, z4.d
343
+ ; CHECK-NEXT: and z0.d, z0.d, z5.d
344
+ ; CHECK-NEXT: orr z1.d, z3.d, z1.d
345
+ ; CHECK-NEXT: orr z0.d, z2.d, z0.d
346
+ ; CHECK-NEXT: stp q1, q0, [x0]
335
347
; CHECK-NEXT: ret
336
348
%op1 = load <4 x i64 >, ptr %a
337
349
%op2 = load <4 x i64 >, ptr %b
0 commit comments