Skip to content

Commit 8c7f10e

Browse files
authored
[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. (#76060)
The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.
1 parent b7e50df commit 8c7f10e

File tree

3 files changed

+403
-3
lines changed

3 files changed

+403
-3
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,12 +506,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
506506
TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
507507
TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
508508

509+
TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
510+
509511
TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
510512
TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
511513

512514
TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
513515
TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
514516

517+
TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
518+
519+
TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
520+
515521
TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
516522

517523
TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
@@ -560,12 +566,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v"
560566
TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
561567
TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
562568

569+
TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
570+
563571
TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
564572
TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
565573

566574
TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
567575
TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
568576

577+
TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4")
578+
579+
TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
580+
569581
TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
570582

571583
TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -637,6 +649,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
637649
TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
638650
TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
639651

652+
TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
653+
TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
654+
640655
TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
641656
TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv")
642657
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -647,6 +662,12 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
647662
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
648663
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
649664

665+
TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
666+
TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
667+
668+
TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
669+
TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
670+
650671
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
651672
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
652673

@@ -834,6 +855,11 @@ TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZG
834855
TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
835856
TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
836857

858+
TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
859+
TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
860+
TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8")
861+
TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
862+
837863
TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
838864
TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
839865
TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -859,6 +885,16 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK, "_ZGV_LL
859885
TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
860886
TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
861887

888+
TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
889+
TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
890+
TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
891+
TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
892+
893+
TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
894+
TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
895+
TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
896+
TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
897+
862898
TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
863899
TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
864900
TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")

llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll

Lines changed: 221 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|sin|tan|cbrt|erf|exp|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(cos|sin|tan|cbrt|erf|exp[^e]|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2
22
; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
33
; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
44
; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON
@@ -2639,3 +2639,223 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
26392639
ret void
26402640
}
26412641

2642+
declare double @modf(double, ptr)
2643+
declare float @modff(float, ptr)
2644+
2645+
define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2646+
; SLEEF-NEON-LABEL: define void @test_modf
2647+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2648+
; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
2649+
;
2650+
; SLEEF-SVE-LABEL: define void @test_modf
2651+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2652+
; SLEEF-SVE: [[TMP23:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2653+
;
2654+
; ARMPL-NEON-LABEL: define void @test_modf
2655+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2656+
; ARMPL-NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
2657+
;
2658+
; ARMPL-SVE-LABEL: define void @test_modf
2659+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2660+
; ARMPL-SVE: [[TMP23:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2661+
;
2662+
entry:
2663+
br label %for.body
2664+
2665+
for.body:
2666+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2667+
%gepa = getelementptr double, ptr %a, i64 %indvars.iv
2668+
%num = load double, ptr %gepa, align 8
2669+
%gepb = getelementptr double, ptr %b, i64 %indvars.iv
2670+
%data = call double @modf(double %num, ptr %gepb)
2671+
%gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
2672+
store double %data, ptr %gepc, align 8
2673+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2674+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2675+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2676+
2677+
for.cond.cleanup:
2678+
ret void
2679+
}
2680+
2681+
define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2682+
; SLEEF-NEON-LABEL: define void @test_modff
2683+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2684+
; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
2685+
;
2686+
; SLEEF-SVE-LABEL: define void @test_modff
2687+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2688+
; SLEEF-SVE: [[TMP23:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2689+
;
2690+
; ARMPL-NEON-LABEL: define void @test_modff
2691+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2692+
; ARMPL-NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
2693+
;
2694+
; ARMPL-SVE-LABEL: define void @test_modff
2695+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2696+
; ARMPL-SVE: [[TMP23:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2697+
;
2698+
entry:
2699+
br label %for.body
2700+
2701+
for.body:
2702+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2703+
%gepa = getelementptr float, ptr %a, i64 %indvars.iv
2704+
%num = load float, ptr %gepa, align 8
2705+
%gepb = getelementptr float, ptr %b, i64 %indvars.iv
2706+
%data = call float @modff(float %num, ptr %gepb)
2707+
%gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
2708+
store float %data, ptr %gepc, align 8
2709+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2710+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2711+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2712+
2713+
for.cond.cleanup:
2714+
ret void
2715+
}
2716+
2717+
declare void @sincos(double, ptr, ptr)
2718+
declare void @sincosf(float, ptr, ptr)
2719+
2720+
define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2721+
; SLEEF-NEON-LABEL: define void @test_sincos
2722+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2723+
; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2724+
;
2725+
; SLEEF-SVE-LABEL: define void @test_sincos
2726+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2727+
; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2728+
;
2729+
; ARMPL-NEON-LABEL: define void @test_sincos
2730+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2731+
; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2732+
;
2733+
; ARMPL-SVE-LABEL: define void @test_sincos
2734+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2735+
; ARMPL-SVE: call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2736+
;
2737+
entry:
2738+
br label %for.body
2739+
2740+
for.body:
2741+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2742+
%gepa = getelementptr double, ptr %a, i64 %indvars.iv
2743+
%num = load double, ptr %gepa, align 8
2744+
%gepb = getelementptr double, ptr %b, i64 %indvars.iv
2745+
%gepc = getelementptr double, ptr %c, i64 %indvars.iv
2746+
call void @sincos(double %num, ptr %gepb, ptr %gepc)
2747+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2748+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2749+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2750+
2751+
for.cond.cleanup:
2752+
ret void
2753+
}
2754+
2755+
define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2756+
; SLEEF-NEON-LABEL: define void @test_sincosf
2757+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2758+
; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2759+
;
2760+
; SLEEF-SVE-LABEL: define void @test_sincosf
2761+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2762+
; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2763+
;
2764+
; ARMPL-NEON-LABEL: define void @test_sincosf
2765+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2766+
; ARMPL-NEON: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2767+
;
2768+
; ARMPL-SVE-LABEL: define void @test_sincosf
2769+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2770+
; ARMPL-SVE: call void @armpl_svsincos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2771+
;
2772+
entry:
2773+
br label %for.body
2774+
2775+
for.body:
2776+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2777+
%gepa = getelementptr float, ptr %a, i64 %indvars.iv
2778+
%num = load float, ptr %gepa, align 8
2779+
%gepb = getelementptr float, ptr %b, i64 %indvars.iv
2780+
%gepc = getelementptr float, ptr %c, i64 %indvars.iv
2781+
call void @sincosf(float %num, ptr %gepb, ptr %gepc)
2782+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2783+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2784+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2785+
2786+
for.cond.cleanup:
2787+
ret void
2788+
}
2789+
2790+
declare void @sincospi(double, ptr, ptr)
2791+
declare void @sincospif(float, ptr, ptr)
2792+
2793+
define void @test_sincospi(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2794+
; SLEEF-NEON-LABEL: define void @test_sincospi
2795+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2796+
; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2797+
;
2798+
; SLEEF-SVE-LABEL: define void @test_sincospi
2799+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2800+
; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincospi(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2801+
;
2802+
; ARMPL-NEON-LABEL: define void @test_sincospi
2803+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2804+
; ARMPL-NEON: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2805+
;
2806+
; ARMPL-SVE-LABEL: define void @test_sincospi
2807+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2808+
; ARMPL-SVE: call void @armpl_svsincospi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2809+
;
2810+
entry:
2811+
br label %for.body
2812+
2813+
for.body:
2814+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2815+
%gepa = getelementptr double, ptr %a, i64 %indvars.iv
2816+
%num = load double, ptr %gepa, align 8
2817+
%gepb = getelementptr double, ptr %b, i64 %indvars.iv
2818+
%gepc = getelementptr double, ptr %c, i64 %indvars.iv
2819+
call void @sincospi(double %num, ptr %gepb, ptr %gepc)
2820+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2821+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2822+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2823+
2824+
for.cond.cleanup:
2825+
ret void
2826+
}
2827+
2828+
define void @test_sincospif(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
2829+
; SLEEF-NEON-LABEL: define void @test_sincospif
2830+
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2831+
; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2832+
;
2833+
; SLEEF-SVE-LABEL: define void @test_sincospif
2834+
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2835+
; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincospif(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2836+
;
2837+
; ARMPL-NEON-LABEL: define void @test_sincospif
2838+
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2839+
; ARMPL-NEON: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
2840+
;
2841+
; ARMPL-SVE-LABEL: define void @test_sincospif
2842+
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
2843+
; ARMPL-SVE: call void @armpl_svsincospi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2844+
;
2845+
entry:
2846+
br label %for.body
2847+
2848+
for.body:
2849+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2850+
%gepa = getelementptr float, ptr %a, i64 %indvars.iv
2851+
%num = load float, ptr %gepa, align 8
2852+
%gepb = getelementptr float, ptr %b, i64 %indvars.iv
2853+
%gepc = getelementptr float, ptr %c, i64 %indvars.iv
2854+
call void @sincospif(float %num, ptr %gepb, ptr %gepc)
2855+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2856+
%exitcond = icmp eq i64 %indvars.iv.next, 1000
2857+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
2858+
2859+
for.cond.cleanup:
2860+
ret void
2861+
}

0 commit comments

Comments
 (0)