Skip to content

[AArch64] Extend v2i64 fptosi.sat to v2f64 #91714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4286,6 +4286,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
return SDValue();

SDLoc DL(Op);
// Expand to f64 if we are saturating to i64, to help produce keep the lanes
// the same width and produce a fcvtzu.
if (SatWidth == 64 && SrcElementWidth < 64) {
MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
SrcVT = F64VT;
SrcElementVT = MVT::f64;
SrcElementWidth = 64;
}
// Cases that we can emit directly.
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/AArch64/fcvt_combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,8 @@ entry:
define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
Expand Down Expand Up @@ -1056,12 +1052,8 @@ entry:
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
Expand Down
23 changes: 6 additions & 17 deletions llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
Expand Down Expand Up @@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov s3, v0.s[1]
; CHECK-NEXT: fcvtzs x9, s0
; CHECK-NEXT: mov s2, v1.s[1]
; CHECK-NEXT: fcvtzs x8, s1
; CHECK-NEXT: fcvtzs x11, s3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fcvtzs x10, s2
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
Expand Down
23 changes: 6 additions & 17 deletions llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzu x8, s0
; CHECK-NEXT: fcvtzu x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
Expand Down Expand Up @@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov s3, v0.s[1]
; CHECK-NEXT: fcvtzu x9, s0
; CHECK-NEXT: mov s2, v1.s[1]
; CHECK-NEXT: fcvtzu x8, s1
; CHECK-NEXT: fcvtzu x11, s3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fcvtzu x10, s2
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzu v1.2d, v1.2d
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,8 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,8 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; CHECK-i64-LABEL: lrint_v2f32:
; CHECK-i64: // %bb.0:
; CHECK-i64-NEXT: frintx v0.2s, v0.2s
; CHECK-i64-NEXT: mov s1, v0.s[1]
; CHECK-i64-NEXT: fcvtzs x8, s0
; CHECK-i64-NEXT: fcvtzs x9, s1
; CHECK-i64-NEXT: fmov d0, x8
; CHECK-i64-NEXT: mov v0.d[1], x9
; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s
; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
Expand Down
Loading
Loading