@@ -291,3 +291,160 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
291
291
%abd = call <2 x double > @llvm.aarch64.neon.fabd.v2f64 (<2 x double > %lhs , <2 x double > %rhs )
292
292
ret <2 x double > %abd
293
293
}
294
+
295
+ define <8 x i16 > @test_uabd_knownbits_vec8i16 (<8 x i16 > %lhs , <8 x i16 > %rhs ) {
296
+ ; CHECK-LABEL: test_uabd_knownbits_vec8i16:
297
+ ; CHECK: // %bb.0:
298
+ ; CHECK-NEXT: movi v2.8h, #15
299
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
300
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
301
+ ; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
302
+ ; CHECK-NEXT: rev64 v0.8h, v0.8h
303
+ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
304
+ ; CHECK-NEXT: ret
305
+ %and1 = and <8 x i16 > %lhs , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
306
+ %and2 = and <8 x i16 > %rhs , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
307
+ %uabd = call <8 x i16 > @llvm.aarch64.neon.uabd.v8i16 (<8 x i16 > %and1 , <8 x i16 > %and2 )
308
+ %suff = shufflevector <8 x i16 > %uabd , <8 x i16 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
309
+ %res = and <8 x i16 > %suff , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
310
+ ret <8 x i16 > %res
311
+ }
312
+
313
+ define <4 x i32 > @knownbits_uabd_mask_and_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
314
+ ; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
315
+ ; CHECK: // %bb.0:
316
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
317
+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #17
318
+ ; CHECK-NEXT: ret
319
+ %1 = and <4 x i32 > %a0 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
320
+ %2 = and <4 x i32 > %a1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
321
+ %3 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
322
+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
323
+ %5 = lshr <4 x i32 > %4 , <i32 17 , i32 17 , i32 17 , i32 17 >
324
+ ret <4 x i32 > %5
325
+ }
326
+
327
+ define <4 x i32 > @knownbits_mask_and_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
328
+ ; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
329
+ ; CHECK: // %bb.0:
330
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
331
+ ; CHECK-NEXT: ret
332
+ %1 = and <4 x i32 > %a0 , <i32 32767 , i32 32767 , i32 32767 , i32 32767 >
333
+ %2 = and <4 x i32 > %a1 , <i32 32767 , i32 32767 , i32 32767 , i32 32767 >
334
+ %3 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
335
+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
336
+ %5 = lshr <4 x i32 > %4 , <i32 17 , i32 17 , i32 17 , i32 17 >
337
+ ret <4 x i32 > %5
338
+ }
339
+
340
+ define <4 x i32 > @test_sabd_knownbits_vec4i32 (<4 x i32 > %lhs , <4 x i32 > %rhs ) {
341
+ ; CHECK-LABEL: test_sabd_knownbits_vec4i32:
342
+ ; CHECK: // %bb.0:
343
+ ; CHECK-NEXT: adrp x8, .LCPI31_0
344
+ ; CHECK-NEXT: adrp x9, .LCPI31_1
345
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
346
+ ; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
347
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
348
+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
349
+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
350
+ ; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff
351
+ ; CHECK-NEXT: mov v0.s[1], v0.s[0]
352
+ ; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
353
+ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
354
+ ; CHECK-NEXT: ret
355
+ %and1 = and <4 x i32 > %lhs , <i32 255 , i32 -1 , i32 -1 , i32 255 >
356
+ %and2 = and <4 x i32 > %rhs , <i32 255 , i32 255 , i32 -1 , i32 -1 >
357
+ %abd = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %and1 , <4 x i32 > %and2 )
358
+ %s = shufflevector <4 x i32 > %abd , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
359
+ %4 = and <4 x i32 > %s , <i32 255 , i32 255 , i32 255 , i32 255 >
360
+ ret <4 x i32 > %4
361
+ }
362
+
363
+ define <4 x i32 > @knownbits_sabd_and_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
364
+ ; CHECK-LABEL: knownbits_sabd_and_mask:
365
+ ; CHECK: // %bb.0:
366
+ ; CHECK-NEXT: adrp x8, .LCPI32_0
367
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
368
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
369
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
370
+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
371
+ ; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
372
+ ; CHECK-NEXT: ret
373
+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
374
+ %2 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
375
+ %3 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
376
+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
377
+ ret <4 x i32 > %4
378
+ }
379
+
380
+ define <4 x i32 > @knownbits_sabd_and_or_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
381
+ ; CHECK-LABEL: knownbits_sabd_and_or_mask:
382
+ ; CHECK: // %bb.0:
383
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
384
+ ; CHECK-NEXT: ret
385
+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
386
+ %2 = or <4 x i32 > %1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
387
+ %3 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
388
+ %4 = or <4 x i32 > %3 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
389
+ %5 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
390
+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
391
+ ret <4 x i32 > %6
392
+ }
393
+
394
+ define <4 x i32 > @knownbits_sabd_and_xor_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
395
+ ; CHECK-LABEL: knownbits_sabd_and_xor_mask:
396
+ ; CHECK: // %bb.0:
397
+ ; CHECK-NEXT: adrp x8, .LCPI34_0
398
+ ; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
399
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
400
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
401
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
402
+ ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
403
+ ; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
404
+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
405
+ ; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
406
+ ; CHECK-NEXT: ret
407
+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
408
+ %2 = xor <4 x i32 > %1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
409
+ %3 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
410
+ %4 = xor <4 x i32 > %3 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
411
+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
412
+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
413
+ ret <4 x i32 > %6
414
+ }
415
+
416
+ define <4 x i32 > @knownbits_sabd_and_shl_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
417
+ ; CHECK-LABEL: knownbits_sabd_and_shl_mask:
418
+ ; CHECK: // %bb.0:
419
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
420
+ ; CHECK-NEXT: ret
421
+ %1 = and <4 x i32 > %a0 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
422
+ %2 = shl <4 x i32 > %1 , <i32 17 , i32 17 , i32 17 , i32 17 >
423
+ %3 = and <4 x i32 > %a1 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
424
+ %4 = shl <4 x i32 > %3 , <i32 17 , i32 17 , i32 17 , i32 17 >
425
+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
426
+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
427
+ ret <4 x i32 > %6
428
+ }
429
+
430
+ define <4 x i32 > @knownbits_sabd_and_mul_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
431
+ ; CHECK-LABEL: knownbits_sabd_and_mul_mask:
432
+ ; CHECK: // %bb.0:
433
+ ; CHECK-NEXT: adrp x8, .LCPI36_0
434
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
435
+ ; CHECK-NEXT: and v3.16b, v0.16b, v2.16b
436
+ ; CHECK-NEXT: and v2.16b, v1.16b, v2.16b
437
+ ; CHECK-NEXT: mul v0.4s, v0.4s, v3.4s
438
+ ; CHECK-NEXT: mul v1.4s, v1.4s, v2.4s
439
+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
440
+ ; CHECK-NEXT: mov v0.s[1], v0.s[0]
441
+ ; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
442
+ ; CHECK-NEXT: ret
443
+ %1 = and <4 x i32 > %a0 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
444
+ %2 = mul <4 x i32 > %a0 , %1
445
+ %3 = and <4 x i32 > %a1 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
446
+ %4 = mul <4 x i32 > %a1 , %3
447
+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
448
+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
449
+ ret <4 x i32 > %6
450
+ }
0 commit comments