Skip to content

Commit 78f286c

Browse files
committed
[AArch64] Extend v2i64 fptosi.sat to v2f64
This helps it produce a single instruction for the saturate, as opposed to having to scalarize.
1 parent 5d24217 commit 78f286c

File tree

5 files changed

+27
-51
lines changed

5 files changed

+27
-51
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4273,6 +4273,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
42734273
return SDValue();
42744274

42754275
SDLoc DL(Op);
4276+
// Expand to f64 if we are saturating to i64, to help produce keep the lanes
4277+
// the same width and produce a fcvtzu.
4278+
if (SatWidth == 64 && SrcElementWidth < 64) {
4279+
MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
4280+
SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
4281+
SrcVT = F64VT;
4282+
SrcElementVT = MVT::f64;
4283+
SrcElementWidth = 64;
4284+
}
42764285
// Cases that we can emit directly.
42774286
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
42784287
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,

llvm/test/CodeGen/AArch64/fcvt_combine.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
345345
; CHECK: // %bb.0:
346346
; CHECK-NEXT: fmov v1.2s, #16.00000000
347347
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
348-
; CHECK-NEXT: mov s1, v0.s[1]
349-
; CHECK-NEXT: fcvtzs x8, s0
350-
; CHECK-NEXT: fcvtzs x9, s1
351-
; CHECK-NEXT: fmov d0, x8
352-
; CHECK-NEXT: mov v0.d[1], x9
348+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
349+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
353350
; CHECK-NEXT: ret
354351
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
355352
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)

llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -436,12 +436,8 @@ entry:
436436
define <2 x i64> @stest_f32i64(<2 x float> %x) {
437437
; CHECK-LABEL: stest_f32i64:
438438
; CHECK: // %bb.0: // %entry
439-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
440-
; CHECK-NEXT: mov s1, v0.s[1]
441-
; CHECK-NEXT: fcvtzs x8, s0
442-
; CHECK-NEXT: fcvtzs x9, s1
443-
; CHECK-NEXT: fmov d0, x8
444-
; CHECK-NEXT: mov v0.d[1], x9
439+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
440+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
445441
; CHECK-NEXT: ret
446442
entry:
447443
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -1056,12 +1052,8 @@ entry:
10561052
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
10571053
; CHECK-LABEL: stest_f32i64_mm:
10581054
; CHECK: // %bb.0: // %entry
1059-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1060-
; CHECK-NEXT: mov s1, v0.s[1]
1061-
; CHECK-NEXT: fcvtzs x8, s0
1062-
; CHECK-NEXT: fcvtzs x9, s1
1063-
; CHECK-NEXT: fmov d0, x8
1064-
; CHECK-NEXT: mov v0.d[1], x9
1055+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
1056+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
10651057
; CHECK-NEXT: ret
10661058
entry:
10671059
%conv = fptosi <2 x float> %x to <2 x i128>

llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
793793
define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
794794
; CHECK-LABEL: test_signed_v2f32_v2i64:
795795
; CHECK: // %bb.0:
796-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
797-
; CHECK-NEXT: mov s1, v0.s[1]
798-
; CHECK-NEXT: fcvtzs x8, s0
799-
; CHECK-NEXT: fcvtzs x9, s1
800-
; CHECK-NEXT: fmov d0, x8
801-
; CHECK-NEXT: mov v0.d[1], x9
796+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
797+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
802798
; CHECK-NEXT: ret
803799
%x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
804800
ret <2 x i64> %x
@@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
10601056
define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
10611057
; CHECK-LABEL: test_signed_v4f32_v4i64:
10621058
; CHECK: // %bb.0:
1063-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1064-
; CHECK-NEXT: mov s3, v0.s[1]
1065-
; CHECK-NEXT: fcvtzs x9, s0
1066-
; CHECK-NEXT: mov s2, v1.s[1]
1067-
; CHECK-NEXT: fcvtzs x8, s1
1068-
; CHECK-NEXT: fcvtzs x11, s3
1069-
; CHECK-NEXT: fmov d0, x9
1070-
; CHECK-NEXT: fcvtzs x10, s2
1071-
; CHECK-NEXT: fmov d1, x8
1072-
; CHECK-NEXT: mov v0.d[1], x11
1073-
; CHECK-NEXT: mov v1.d[1], x10
1059+
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
1060+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
1061+
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
1062+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
10741063
; CHECK-NEXT: ret
10751064
%x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
10761065
ret <4 x i64> %x

llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
707707
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
708708
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
709709
; CHECK: // %bb.0:
710-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
711-
; CHECK-NEXT: mov s1, v0.s[1]
712-
; CHECK-NEXT: fcvtzu x8, s0
713-
; CHECK-NEXT: fcvtzu x9, s1
714-
; CHECK-NEXT: fmov d0, x8
715-
; CHECK-NEXT: mov v0.d[1], x9
710+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
711+
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
716712
; CHECK-NEXT: ret
717713
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
718714
ret <2 x i64> %x
@@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
927923
define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
928924
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
929925
; CHECK: // %bb.0:
930-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
931-
; CHECK-NEXT: mov s3, v0.s[1]
932-
; CHECK-NEXT: fcvtzu x9, s0
933-
; CHECK-NEXT: mov s2, v1.s[1]
934-
; CHECK-NEXT: fcvtzu x8, s1
935-
; CHECK-NEXT: fcvtzu x11, s3
936-
; CHECK-NEXT: fmov d0, x9
937-
; CHECK-NEXT: fcvtzu x10, s2
938-
; CHECK-NEXT: fmov d1, x8
939-
; CHECK-NEXT: mov v0.d[1], x11
940-
; CHECK-NEXT: mov v1.d[1], x10
926+
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
927+
; CHECK-NEXT: fcvtl v0.2d, v0.2s
928+
; CHECK-NEXT: fcvtzu v1.2d, v1.2d
929+
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
941930
; CHECK-NEXT: ret
942931
%x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
943932
ret <4 x i64> %x

0 commit comments

Comments
 (0)