Skip to content

[AArch64] Add patterns for conversions using fixed-point scvtf #92922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 29, 2024

Conversation

momchil-velikov
Copy link
Collaborator

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 21, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/92922.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+1-1)
  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+25)
  • (added) llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll (+103)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e31a27e9428e8..dd422214d45f4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14328,7 +14328,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
       unsigned Opc =
           (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
       return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, DL, MVT::i32));
+                         DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
     }
 
     // Right shift register.  Note, there is not a shift right register
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a39e3b7be76dc..291f553776752 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -735,6 +735,12 @@ def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
 def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
 
 def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
+
+def AArch64vashr_exact : PatFrag<(ops          node:$lhs, node:$rhs),
+                                 (AArch64vashr node:$lhs, node:$rhs), [{
+  return N->getFlags().hasExact();
+}]>;
+
 def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
 def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
 def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
@@ -7712,6 +7718,25 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
 defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
 defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
 
+let Predicates = [HasNEON] in {
+def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))),
+          (SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>;
+
+def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))),
+          (SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>;
+
+def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))),
+          (SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>;
+}
+
+let Predicates = [HasNEON, HasFullFP16] in {
+def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))),
+          (SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>;
+
+def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))),
+          (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
+}
+
 // X << 1 ==> X + X
 class SHLToADDPat<ValueType ty, RegisterClass regtype>
   : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
diff --git a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
new file mode 100644
index 0000000000000..7141b5b03a1ac
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+; First some corner cases
+define <4 x float> @f_v4_s0(<4 x i32> %u) {
+; CHECK-LABEL: f_v4_s0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %s = ashr exact <4 x i32> %u, <i32 0, i32 0, i32 0, i32 0>
+  %v = sitofp <4 x i32> %s to <4 x float>
+  ret <4 x float> %v
+}
+
+define <4 x float> @f_v4_s1(<4 x i32> %u) {
+; CHECK-LABEL: f_v4_s1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.4s, v0.4s, #1
+; CHECK-NEXT:    ret
+  %s = ashr exact <4 x i32> %u, <i32 1, i32 1, i32 1, i32 1>
+  %v = sitofp <4 x i32> %s to <4 x float>
+  ret <4 x float> %v
+}
+
+define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) {
+; CHECK-LABEL: f_v4_s24_inexact:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %s = ashr <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
+  %v = sitofp <4 x i32> %s to <4 x float>
+  ret <4 x float> %v
+}
+
+define <4 x float> @f_v4_s32(<4 x i32> %u) {
+; CHECK-LABEL: f_v4_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %s = ashr <4 x i32> %u, <i32 32, i32 32, i32 32, i32 32>
+  %v = sitofp <4 x i32> %s to <4 x float>
+  ret <4 x float> %v
+}
+
+; Common cases for conversion from signed integer to floating point types
+define <2 x float> @f_v2_s24(<2 x i32> %u) {
+; CHECK-LABEL: f_v2_s24:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.2s, v0.2s, #24
+; CHECK-NEXT:    ret
+  %s = ashr exact <2 x i32> %u, <i32 24, i32 24>
+  %v = sitofp <2 x i32> %s to <2 x float>
+  ret <2 x float> %v
+}
+
+define <4 x float> @f_v4_s24(<4 x i32> %u) {
+; CHECK-LABEL: f_v4_s24:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.4s, v0.4s, #24
+; CHECK-NEXT:    ret
+  %s = ashr exact <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
+  %v = sitofp <4 x i32> %s to <4 x float>
+  ret <4 x float> %v
+}
+
+; Check legalisation to <2 x f64> does not get in the way
+define <8 x double> @d_v8_s64(<8 x i64> %u) {
+; CHECK-LABEL: d_v8_s64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.2d, v0.2d, #56
+; CHECK-NEXT:    scvtf v1.2d, v1.2d, #56
+; CHECK-NEXT:    scvtf v2.2d, v2.2d, #56
+; CHECK-NEXT:    scvtf v3.2d, v3.2d, #56
+; CHECK-NEXT:    ret
+  %s = ashr exact <8 x i64> %u, <i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56>
+  %v = sitofp <8 x i64> %s to <8 x double>
+  ret <8 x double> %v
+}
+
+define <4 x half> @h_v4_s8(<4 x i16> %u) #0 {
+; CHECK-LABEL: h_v4_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.4h, v0.4h, #8
+; CHECK-NEXT:    ret
+  %s = ashr exact <4 x i16> %u, <i16 8, i16 8, i16 8, i16 8>
+  %v = sitofp <4 x i16> %s to <4 x half>
+  ret <4 x half> %v
+}
+
+define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
+; CHECK-LABEL: h_v8_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf v0.8h, v0.8h, #8
+; CHECK-NEXT:    ret
+  %s = ashr exact <8 x i16> %u, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %v = sitofp <8 x i16> %s to <8 x half>
+  ret <8 x half> %v
+}
+
+attributes #0 = { "target-features"="+fullfp16"}

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks - This looks great, and I couldn't figure out any way in which it wouldn't match the version with a fmul/fdiv like the pseudo code. LGTM

Change-Id: If19131b160484aba942dbbef042fb67f0b98561d
Change-Id: Ie80a97a0124b057394ba2b8a2f1158591af1af5a
@momchil-velikov momchil-velikov force-pushed the fixed-point-scvtf-pat branch from 27e9e12 to a28dc1b Compare May 28, 2024 13:48
@momchil-velikov momchil-velikov merged commit 1ea8cae into llvm:main May 29, 2024
7 checks passed
@momchil-velikov momchil-velikov deleted the fixed-point-scvtf-pat branch November 13, 2024 09:33
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants