[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

labrinea · 2023-12-20T14:43:00Z

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

llvmbot · 2023-12-20T14:43:31Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Alexandros Lamprineas (labrinea)

Changes

…ments.

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

Patch is 37.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76060.diff

4 Files Affected:

(modified) llvm/include/llvm/Analysis/VecFuncs.def (+36)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+2-1)
(added) llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll (+275)
(modified) llvm/test/Transforms/Util/add-TLI-mappings.ll (+122-2)

diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 4bffcdee6f9caa..ee9207bb4f7dc1 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -506,12 +506,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
 
+TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
+
 TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
 TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
 
 TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
 
+TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
+TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
 TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
 
 TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
@@ -560,12 +566,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v"
 TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
+
 TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
 TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
 
 TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
 TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
 
 TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -637,6 +649,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
 TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
 TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv")
 TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -647,6 +662,12 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
 TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh",  SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
 
@@ -834,6 +855,11 @@ TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZG
 TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
+TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
 TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
 TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
 TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -859,6 +885,16 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK, "_ZGV_LL
 TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
 TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02e400d590bed4..1ca9556adad6ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -504,7 +504,8 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     SmallVector<Type *, 2> TysForDecl;
     // Add return type if intrinsic is overloaded on it.
-    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1) &&
+        VectorIntrinsicID != Intrinsic::not_intrinsic) {
       TysForDecl.push_back(
           VectorType::get(CI.getType()->getScalarType(), State.VF));
     }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
new file mode 100644
index 00000000000000..ba32236f275023
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(modf|modff|sincos|sincosf|sincospi|sincospif)" --version 4
+
+; RUN: opt < %s -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-SVE
+; RUN: opt < %s -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-NEON:    [[TMP8:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON:    [[TMP9:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-SVE:    [[TMP26:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    [[TMP27:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-NEON:    [[TMP8:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON:    [[TMP9:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modf(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-SVE:    [[TMP26:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    [[TMP27:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %data = call double @modf(double %num, ptr %gepb)
+  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modff(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP8:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON:    [[TMP9:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modff(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP26:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    [[TMP27:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modff(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP8:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON:    [[TMP9:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modff(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP26:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE:    [[TMP27:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %data = call float @modff(float %num, ptr %gepb)
+  %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincos(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincos(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincos(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; ARMPL-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_sincos(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincos(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincosf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]], <4 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincosf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x ptr> [[TMP26:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x ptr> [[TMP27:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR5:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincosf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[T...
[truncated]

mgabka · 2023-12-20T14:47:39Z

could you also add tests for the replace-with-veclib-pass which is using those mapping to:
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

This patch prepares the ground for llvm#76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)

This patch prepares the ground for #76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)

labrinea · 2023-12-27T16:28:50Z

could you also add tests for the replace-with-veclib-pass which is using those mapping to: llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

I am not seeing corresponding llvm instrinsics for the veclib functions I've added. I don't think there's anything needed to be done in these test files.

mgabka · 2023-12-28T11:45:41Z

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

+  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %data, ptr %gepc, align 8


this isn't needed

Without a store we are not using the return value of the call. I see no harm here.

sure, I just pointed it out as it could simplify the test.

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see llvm#76060).

When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see #76060).

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

labrinea requested review from paschalis-mpeis and mgabka December 20, 2023 14:43

llvmbot added vectorizers llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Dec 20, 2023

mgabka reviewed Dec 20, 2023

View reviewed changes

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll Outdated Show resolved Hide resolved

mgabka reviewed Dec 20, 2023

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

labrinea mentioned this pull request Dec 21, 2023

[TLI][NFC] Autogenerate vectorized call tests for SLEEF/ArmPL. #76146

Merged

labrinea mentioned this pull request Dec 28, 2023

[LV] Fix crash when vectorizing function calls with linear args. #76274

Merged

mgabka reviewed Dec 28, 2023

View reviewed changes

labrinea mentioned this pull request Jan 2, 2024

[TLI] Use the VFABI demangling when declaring vector variants. #76753

Merged

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args.

db47426

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

labrinea force-pushed the tli-mappings-with-linear-args branch from d77ca23 to db47426 Compare January 3, 2024 16:57

labrinea changed the title ~~[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear argu…~~ [TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. Jan 3, 2024

mgabka approved these changes Jan 4, 2024

View reviewed changes

paschalis-mpeis approved these changes Jan 4, 2024

View reviewed changes

labrinea merged commit 8c7f10e into llvm:main Jan 5, 2024

labrinea deleted the tli-mappings-with-linear-args branch January 5, 2024 11:10

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

Uh oh!

labrinea commented Dec 20, 2023 •

edited

Loading

Uh oh!

llvmbot commented Dec 20, 2023 •

edited

Loading

Uh oh!

mgabka commented Dec 20, 2023 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

labrinea commented Dec 27, 2023 •

edited

Loading

Uh oh!

mgabka Dec 28, 2023

Uh oh!

labrinea Jan 3, 2024

Uh oh!

mgabka Jan 4, 2024

Uh oh!

Uh oh!

Uh oh!

		%gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
		store double %data, ptr %gepc, align 8

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

Uh oh!

Conversation

labrinea commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgabka commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

labrinea commented Dec 27, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgabka Dec 28, 2023

Choose a reason for hiding this comment

Uh oh!

labrinea Jan 3, 2024

Choose a reason for hiding this comment

Uh oh!

mgabka Jan 4, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

labrinea commented Dec 20, 2023 •

edited

Loading

llvmbot commented Dec 20, 2023 •

edited

Loading

mgabka commented Dec 20, 2023 •

edited

Loading

labrinea commented Dec 27, 2023 •

edited

Loading