Skip to content

Commit cbebace

Browse files
committed
[AArch64] Add UQXTN2 patterns
Similar to the existing UQXTN and SQXTN2 patterns, we can generate a UQXTN2 from concat(Vd, trunc(min(X, 255)))
1 parent 14ec474 commit cbebace

File tree

4 files changed

+49
-57
lines changed

4 files changed

+49
-57
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5363,6 +5363,17 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
53635363
(v4i32 VImm8000)))),
53645364
(SQXTNv4i16 V128:$Vn)>;
53655365

5366+
// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5367+
def : Pat<(v16i8 (concat_vectors
5368+
(v8i8 V64:$Vd),
5369+
(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
5370+
(UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5371+
// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5372+
def : Pat<(v8i16 (concat_vectors
5373+
(v4i16 V64:$Vd),
5374+
(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
5375+
(UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5376+
53665377
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
53675378
// with reversed min/max
53685379
def : Pat<(v16i8 (concat_vectors

llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,12 @@ entry:
283283
define <8 x i16> @utesth_f16i16(<8 x half> %x) {
284284
; CHECK-CVT-LABEL: utesth_f16i16:
285285
; CHECK-CVT: // %bb.0: // %entry
286-
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
287-
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
288-
; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
289-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
290-
; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
291-
; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
292-
; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
293-
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
286+
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
287+
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
288+
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
289+
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
290+
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
291+
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
294292
; CHECK-CVT-NEXT: ret
295293
;
296294
; CHECK-FP16-LABEL: utesth_f16i16:
@@ -308,14 +306,12 @@ entry:
308306
define <8 x i16> @ustest_f16i16(<8 x half> %x) {
309307
; CHECK-CVT-LABEL: ustest_f16i16:
310308
; CHECK-CVT: // %bb.0: // %entry
311-
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
312-
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
313-
; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
314-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
315-
; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
316-
; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
317-
; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
318-
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
309+
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
310+
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
311+
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
312+
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
313+
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
314+
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
319315
; CHECK-CVT-NEXT: ret
320316
;
321317
; CHECK-FP16-LABEL: ustest_f16i16:
@@ -909,14 +905,12 @@ entry:
909905
define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
910906
; CHECK-CVT-LABEL: utesth_f16i16_mm:
911907
; CHECK-CVT: // %bb.0: // %entry
912-
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
913-
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
914-
; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
915-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
916-
; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
917-
; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
918-
; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
919-
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
908+
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
909+
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
910+
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
911+
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
912+
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
913+
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
920914
; CHECK-CVT-NEXT: ret
921915
;
922916
; CHECK-FP16-LABEL: utesth_f16i16_mm:
@@ -933,14 +927,12 @@ entry:
933927
define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
934928
; CHECK-CVT-LABEL: ustest_f16i16_mm:
935929
; CHECK-CVT: // %bb.0: // %entry
936-
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
937-
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
938-
; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
939-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
940-
; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
941-
; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
942-
; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
943-
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
930+
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
931+
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
932+
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
933+
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
934+
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
935+
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
944936
; CHECK-CVT-NEXT: ret
945937
;
946938
; CHECK-FP16-LABEL: ustest_f16i16_mm:

llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,12 +2509,10 @@ define <16 x i8> @test_unsigned_v16f32_v16i8(<16 x float> %f) {
25092509
define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) {
25102510
; CHECK-LABEL: test_unsigned_v8f32_v8i16:
25112511
; CHECK: // %bb.0:
2512-
; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff
2513-
; CHECK-NEXT: fcvtzu v1.4s, v1.4s
25142512
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
2515-
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
2516-
; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s
2517-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
2513+
; CHECK-NEXT: fcvtzu v1.4s, v1.4s
2514+
; CHECK-NEXT: uqxtn v0.4h, v0.4s
2515+
; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
25182516
; CHECK-NEXT: ret
25192517
%x = call <8 x i16> @llvm.fptoui.sat.v8f32.v8i16(<8 x float> %f)
25202518
ret <8 x i16> %x
@@ -2523,17 +2521,14 @@ define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) {
25232521
define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
25242522
; CHECK-LABEL: test_unsigned_v16f32_v16i16:
25252523
; CHECK: // %bb.0:
2526-
; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff
2527-
; CHECK-NEXT: fcvtzu v1.4s, v1.4s
25282524
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
2529-
; CHECK-NEXT: fcvtzu v3.4s, v3.4s
25302525
; CHECK-NEXT: fcvtzu v2.4s, v2.4s
2531-
; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s
2532-
; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s
2533-
; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s
2534-
; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s
2535-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
2536-
; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h
2526+
; CHECK-NEXT: fcvtzu v4.4s, v1.4s
2527+
; CHECK-NEXT: uqxtn v0.4h, v0.4s
2528+
; CHECK-NEXT: uqxtn v1.4h, v2.4s
2529+
; CHECK-NEXT: fcvtzu v2.4s, v3.4s
2530+
; CHECK-NEXT: uqxtn2 v0.8h, v4.4s
2531+
; CHECK-NEXT: uqxtn2 v1.8h, v2.4s
25372532
; CHECK-NEXT: ret
25382533
%x = call <16 x i16> @llvm.fptoui.sat.v16f32.v16i16(<16 x float> %f)
25392534
ret <16 x i16> %x
@@ -2632,12 +2627,10 @@ define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
26322627
;
26332628
; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8:
26342629
; CHECK-FP16: // %bb.0:
2635-
; CHECK-FP16-NEXT: movi v2.2d, #0xff00ff00ff00ff
2636-
; CHECK-FP16-NEXT: fcvtzu v1.8h, v1.8h
26372630
; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
2638-
; CHECK-FP16-NEXT: umin v1.8h, v1.8h, v2.8h
2639-
; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v2.8h
2640-
; CHECK-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b
2631+
; CHECK-FP16-NEXT: fcvtzu v1.8h, v1.8h
2632+
; CHECK-FP16-NEXT: uqxtn v0.8b, v0.8h
2633+
; CHECK-FP16-NEXT: uqxtn2 v0.16b, v1.8h
26412634
; CHECK-FP16-NEXT: ret
26422635
%x = call <16 x i8> @llvm.fptoui.sat.v16f16.v16i8(<16 x half> %f)
26432636
ret <16 x i8> %x

llvm/test/CodeGen/AArch64/qmovn.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,8 @@ entry:
250250
define <16 x i8> @unsigned_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
251251
; CHECK-LABEL: unsigned_v8i16_to_v16i8:
252252
; CHECK: // %bb.0: // %entry
253-
; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff
254253
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
255-
; CHECK-NEXT: umin v1.8h, v1.8h, v2.8h
256-
; CHECK-NEXT: xtn2 v0.16b, v1.8h
254+
; CHECK-NEXT: uqxtn2 v0.16b, v1.8h
257255
; CHECK-NEXT: ret
258256
entry:
259257
%min = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %y, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>)
@@ -265,10 +263,8 @@ entry:
265263
define <8 x i16> @unsigned_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
266264
; CHECK-LABEL: unsigned_v4i32_to_v8i16:
267265
; CHECK: // %bb.0: // %entry
268-
; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff
269266
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
270-
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
271-
; CHECK-NEXT: xtn2 v0.8h, v1.4s
267+
; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
272268
; CHECK-NEXT: ret
273269
entry:
274270
%min = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %y, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)

0 commit comments

Comments
 (0)