Skip to content

Commit e9f5555

Browse files
rohitaggarwal007Rohit Aggarwal
authored andcommitted
Adding more vector calls for -fveclib=AMDLIBM (llvm#109662)
AMD has it's own implementation of vector calls. New vector calls are introduced in the library for exp10, log10, sincos and finite asin/acos Please refer [https://github.com/amd/aocl-libm-ose] --------- Co-authored-by: Rohit Aggarwal <[email protected]>
1 parent de109db commit e9f5555

File tree

8 files changed

+455
-96
lines changed

8 files changed

+455
-96
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ class TargetLibraryInfo {
416416
case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
417417
case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl:
418418
case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
419+
case LibFunc_exp10: case LibFunc_exp10f: case LibFunc_exp10l:
419420
case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
420421
case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl:
421422
case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl:

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ class TargetTransformInfoImplBase {
177177
Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
178178
Name == "cosh" || Name == "coshf" || Name == "coshl" ||
179179
Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
180-
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
180+
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
181+
Name == "exp10" || Name == "exp10l" || Name == "exp10f")
181182
return false;
182183
// clang-format on
183184
// These are all likely to be optimized into something smaller.

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,14 +1328,17 @@ TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLV
13281328
TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13291329
TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13301330

1331+
TLI_DEFINE_VECFUNC("log10", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13311332
TLI_DEFINE_VECFUNC("log10f", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13321333
TLI_DEFINE_VECFUNC("log10f", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13331334
TLI_DEFINE_VECFUNC("log10f", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13341335

1336+
TLI_DEFINE_VECFUNC("__log10_finite", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13351337
TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13361338
TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13371339
TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13381340

1341+
TLI_DEFINE_VECFUNC("llvm.log10.f64", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13391342
TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13401343
TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13411344
TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
@@ -1350,6 +1353,12 @@ TLI_DEFINE_VECFUNC("erf", "amd_vrd8_erf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13501353
TLI_DEFINE_VECFUNC("exp10", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13511354
TLI_DEFINE_VECFUNC("exp10f", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13521355

1356+
TLI_DEFINE_VECFUNC("__exp10_finite", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1357+
TLI_DEFINE_VECFUNC("__exp10f_finite", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1358+
1359+
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1360+
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1361+
13531362
TLI_DEFINE_VECFUNC("expm1", "amd_vrd2_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13541363
TLI_DEFINE_VECFUNC("expm1f", "amd_vrs4_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13551364

@@ -1380,10 +1389,19 @@ TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LL
13801389
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13811390
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13821391

1392+
TLI_DEFINE_VECFUNC("__asin_finite", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1393+
TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1394+
TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1395+
TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1396+
13831397
TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13841398
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13851399
TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13861400

1401+
TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1402+
TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1403+
TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1404+
13871405
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13881406
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13891407
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
@@ -1421,6 +1439,12 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_
14211439
TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
14221440
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
14231441

1442+
TLI_DEFINE_VECFUNC("sincos", "amd_vrd4_sincos", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl8l8")
1443+
TLI_DEFINE_VECFUNC("sincos", "amd_vrd8_sincos", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl8l8")
1444+
1445+
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs4_sincosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
1446+
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs8_sincosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl4l4")
1447+
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs16_sincosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vl4l4")
14241448
#else
14251449
#error "Must choose which vector library functions are to be defined."
14261450
#endif

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4241,6 +4241,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
42414241
case LibFunc_exp2f:
42424242
case LibFunc_exp2l:
42434243
return Intrinsic::exp2;
4244+
case LibFunc_exp10:
4245+
case LibFunc_exp10f:
4246+
case LibFunc_exp10l:
4247+
return Intrinsic::exp10;
42444248
case LibFunc_log:
42454249
case LibFunc_logf:
42464250
case LibFunc_logl:

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
7676
case Intrinsic::cosh:
7777
case Intrinsic::tanh:
7878
case Intrinsic::exp:
79+
case Intrinsic::exp10:
7980
case Intrinsic::exp2:
8081
case Intrinsic::log:
8182
case Intrinsic::log10:

0 commit comments

Comments
 (0)