@@ -554,6 +554,48 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
554
554
ret <16 x i8 > %sub
555
555
}
556
556
557
+ ; TODO: (abds x, y) upper bits are known zero if x and y have extra sign bits
558
+ define <4 x i16 > @combine_sabd_4h_zerosign (<4 x i16 > %a , <4 x i16 > %b ) #0 {
559
+ ; CHECK-LABEL: combine_sabd_4h_zerosign:
560
+ ; CHECK: // %bb.0:
561
+ ; CHECK-NEXT: adrp x8, .LCPI41_0
562
+ ; CHECK-NEXT: adrp x9, .LCPI41_1
563
+ ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI41_0]
564
+ ; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI41_1]
565
+ ; CHECK-NEXT: sshl v0.4h, v0.4h, v2.4h
566
+ ; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h
567
+ ; CHECK-NEXT: movi v2.4h, #128, lsl #8
568
+ ; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
569
+ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
570
+ ; CHECK-NEXT: ret
571
+ %a.ext = ashr <4 x i16 > %a , <i16 7 , i16 8 , i16 9 , i16 10 >
572
+ %b.ext = ashr <4 x i16 > %b , <i16 11 , i16 12 , i16 13 , i16 14 >
573
+ %max = tail call <4 x i16 > @llvm.smax.v4i16 (<4 x i16 > %a.ext , <4 x i16 > %b.ext )
574
+ %min = tail call <4 x i16 > @llvm.smin.v4i16 (<4 x i16 > %a.ext , <4 x i16 > %b.ext )
575
+ %sub = sub <4 x i16 > %max , %min
576
+ %mask = and <4 x i16 > %sub , <i16 32768 , i16 32768 , i16 32768 , i16 32768 >
577
+ ret <4 x i16 > %mask
578
+ }
579
+
580
+ ; negative test - mask extends beyond known zero bits
581
+ define <2 x i32 > @combine_sabd_2s_zerosign_negative (<2 x i32 > %a , <2 x i32 > %b ) {
582
+ ; CHECK-LABEL: combine_sabd_2s_zerosign_negative:
583
+ ; CHECK: // %bb.0:
584
+ ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
585
+ ; CHECK-NEXT: sshr v1.2s, v1.2s, #15
586
+ ; CHECK-NEXT: mvni v2.2s, #7, msl #16
587
+ ; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
588
+ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
589
+ ; CHECK-NEXT: ret
590
+ %a.ext = ashr <2 x i32 > %a , <i32 3 , i32 3 >
591
+ %b.ext = ashr <2 x i32 > %b , <i32 15 , i32 15 >
592
+ %max = tail call <2 x i32 > @llvm.smax.v2i32 (<2 x i32 > %a.ext , <2 x i32 > %b.ext )
593
+ %min = tail call <2 x i32 > @llvm.smin.v2i32 (<2 x i32 > %a.ext , <2 x i32 > %b.ext )
594
+ %sub = sub <2 x i32 > %max , %min
595
+ %mask = and <2 x i32 > %sub , <i32 -524288 , i32 -524288 > ; 0xFFF80000
596
+ ret <2 x i32 > %mask
597
+ }
598
+
557
599
declare <8 x i8 > @llvm.abs.v8i8 (<8 x i8 >, i1 )
558
600
declare <16 x i8 > @llvm.abs.v16i8 (<16 x i8 >, i1 )
559
601
0 commit comments