Skip to content

Commit 109ede4

Browse files
authored
[AArch64] Extend v2i64 fptosi.sat to v2f64 (#91714)
This helps it produce a single instruction for the saturate, as opposed to having to scalarize.
1 parent d182877 commit 109ede4

File tree

9 files changed

+221
-437
lines changed

9 files changed

+221
-437
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4286,6 +4286,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
42864286
return SDValue();
42874287

42884288
SDLoc DL(Op);
4289+
// Expand to f64 if we are saturating to i64, to help produce keep the lanes
4290+
// the same width and produce a fcvtzu.
4291+
if (SatWidth == 64 && SrcElementWidth < 64) {
4292+
MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
4293+
SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
4294+
SrcVT = F64VT;
4295+
SrcElementVT = MVT::f64;
4296+
SrcElementWidth = 64;
4297+
}
42894298
// Cases that we can emit directly.
42904299
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
42914300
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,

llvm/test/CodeGen/AArch64/fcvt_combine.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
345345
; CHECK: // %bb.0:
346346
; CHECK-NEXT: fmov v1.2s, #16.00000000
347347
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
348-
; CHECK-NEXT: mov s1, v0.s[1]
349-
; CHECK-NEXT: fcvtzs x8, s0
350-
; CHECK-NEXT: fcvtzs x9, s1
351-
; CHECK-NEXT: fmov d0, x8
352-
; CHECK-NEXT: mov v0.d[1], x9
348+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
349+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
353350
; CHECK-NEXT: ret
354351
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
355352
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)

llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -436,12 +436,8 @@ entry:
436436
define <2 x i64> @stest_f32i64(<2 x float> %x) {
437437
; CHECK-LABEL: stest_f32i64:
438438
; CHECK: // %bb.0: // %entry
439-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
440-
; CHECK-NEXT: mov s1, v0.s[1]
441-
; CHECK-NEXT: fcvtzs x8, s0
442-
; CHECK-NEXT: fcvtzs x9, s1
443-
; CHECK-NEXT: fmov d0, x8
444-
; CHECK-NEXT: mov v0.d[1], x9
439+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
440+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
445441
; CHECK-NEXT: ret
446442
entry:
447443
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -1056,12 +1052,8 @@ entry:
10561052
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
10571053
; CHECK-LABEL: stest_f32i64_mm:
10581054
; CHECK: // %bb.0: // %entry
1059-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1060-
; CHECK-NEXT: mov s1, v0.s[1]
1061-
; CHECK-NEXT: fcvtzs x8, s0
1062-
; CHECK-NEXT: fcvtzs x9, s1
1063-
; CHECK-NEXT: fmov d0, x8
1064-
; CHECK-NEXT: mov v0.d[1], x9
1055+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
1056+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
10651057
; CHECK-NEXT: ret
10661058
entry:
10671059
%conv = fptosi <2 x float> %x to <2 x i128>

llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
793793
define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
794794
; CHECK-LABEL: test_signed_v2f32_v2i64:
795795
; CHECK: // %bb.0:
796-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
797-
; CHECK-NEXT: mov s1, v0.s[1]
798-
; CHECK-NEXT: fcvtzs x8, s0
799-
; CHECK-NEXT: fcvtzs x9, s1
800-
; CHECK-NEXT: fmov d0, x8
801-
; CHECK-NEXT: mov v0.d[1], x9
796+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
797+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
802798
; CHECK-NEXT: ret
803799
%x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
804800
ret <2 x i64> %x
@@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
10601056
define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
10611057
; CHECK-LABEL: test_signed_v4f32_v4i64:
10621058
; CHECK: // %bb.0:
1063-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1064-
; CHECK-NEXT: mov s3, v0.s[1]
1065-
; CHECK-NEXT: fcvtzs x9, s0
1066-
; CHECK-NEXT: mov s2, v1.s[1]
1067-
; CHECK-NEXT: fcvtzs x8, s1
1068-
; CHECK-NEXT: fcvtzs x11, s3
1069-
; CHECK-NEXT: fmov d0, x9
1070-
; CHECK-NEXT: fcvtzs x10, s2
1071-
; CHECK-NEXT: fmov d1, x8
1072-
; CHECK-NEXT: mov v0.d[1], x11
1073-
; CHECK-NEXT: mov v1.d[1], x10
1059+
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
1060+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
1061+
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
1062+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
10741063
; CHECK-NEXT: ret
10751064
%x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
10761065
ret <4 x i64> %x

llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
707707
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
708708
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
709709
; CHECK: // %bb.0:
710-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
711-
; CHECK-NEXT: mov s1, v0.s[1]
712-
; CHECK-NEXT: fcvtzu x8, s0
713-
; CHECK-NEXT: fcvtzu x9, s1
714-
; CHECK-NEXT: fmov d0, x8
715-
; CHECK-NEXT: mov v0.d[1], x9
710+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
711+
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
716712
; CHECK-NEXT: ret
717713
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
718714
ret <2 x i64> %x
@@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
927923
define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
928924
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
929925
; CHECK: // %bb.0:
930-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
931-
; CHECK-NEXT: mov s3, v0.s[1]
932-
; CHECK-NEXT: fcvtzu x9, s0
933-
; CHECK-NEXT: mov s2, v1.s[1]
934-
; CHECK-NEXT: fcvtzu x8, s1
935-
; CHECK-NEXT: fcvtzu x11, s3
936-
; CHECK-NEXT: fmov d0, x9
937-
; CHECK-NEXT: fcvtzu x10, s2
938-
; CHECK-NEXT: fmov d1, x8
939-
; CHECK-NEXT: mov v0.d[1], x11
940-
; CHECK-NEXT: mov v1.d[1], x10
926+
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
927+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
928+
; CHECK-NEXT: fcvtzu v1.2d, v1.2d
929+
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
941930
; CHECK-NEXT: ret
942931
%x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
943932
ret <4 x i64> %x

llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
295295
; CHECK-LABEL: llrint_v2i64_v2f32:
296296
; CHECK: // %bb.0:
297297
; CHECK-NEXT: frintx v0.2s, v0.2s
298-
; CHECK-NEXT: mov s1, v0.s[1]
299-
; CHECK-NEXT: fcvtzs x8, s0
300-
; CHECK-NEXT: fcvtzs x9, s1
301-
; CHECK-NEXT: fmov d0, x8
302-
; CHECK-NEXT: mov v0.d[1], x9
298+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
299+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
303300
; CHECK-NEXT: ret
304301
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
305302
ret <2 x i64> %a

llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,11 +534,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
534534
; CHECK-i64-LABEL: lrint_v2f32:
535535
; CHECK-i64: // %bb.0:
536536
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
537-
; CHECK-i64-NEXT: mov s1, v0.s[1]
538-
; CHECK-i64-NEXT: fcvtzs x8, s0
539-
; CHECK-i64-NEXT: fcvtzs x9, s1
540-
; CHECK-i64-NEXT: fmov d0, x8
541-
; CHECK-i64-NEXT: mov v0.d[1], x9
537+
; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
538+
; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
542539
; CHECK-i64-NEXT: ret
543540
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
544541
ret <2 x iXLen> %a

0 commit comments

Comments
 (0)