Skip to content

Commit 164d464

Browse files
committed
[Analysis] getIntrinsicForCallSite - add vectorization support for acos/asin/atan and cosh/sinh/tanh libcalls
Followup to ##106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
1 parent 5dcea46 commit 164d464

File tree

6 files changed

+131
-119
lines changed

6 files changed

+131
-119
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,12 @@ class TargetLibraryInfo {
413413
case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
414414
case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
415415
case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
416+
case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl:
417+
case LibFunc_acos: case LibFunc_acosf: case LibFunc_acosl:
418+
case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl:
419+
case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl:
420+
case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl:
421+
case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl:
416422
case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
417423
case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
418424
case LibFunc_sqrtl_finite:

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,18 @@ class TargetTransformInfoImplBase {
160160
// These will all likely lower to a single selection DAG node.
161161
// clang-format off
162162
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
163-
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
164-
Name == "fmin" || Name == "fminf" || Name == "fminl" ||
165-
Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
166-
Name == "sin" || Name == "sinf" || Name == "sinl" ||
167-
Name == "cos" || Name == "cosf" || Name == "cosl" ||
168-
Name == "tan" || Name == "tanf" || Name == "tanl" ||
163+
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
164+
Name == "fmin" || Name == "fminf" || Name == "fminl" ||
165+
Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
166+
Name == "sin" || Name == "sinf" || Name == "sinl" ||
167+
Name == "cos" || Name == "cosf" || Name == "cosl" ||
168+
Name == "tan" || Name == "tanf" || Name == "tanl" ||
169+
Name == "asin" || Name == "asinf" || Name == "asinl" ||
170+
Name == "acos" || Name == "acosf" || Name == "acosl" ||
171+
Name == "atan" || Name == "atanf" || Name == "atanl" ||
172+
Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
173+
Name == "cosh" || Name == "coshf" || Name == "coshl" ||
174+
Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
169175
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
170176
return false;
171177
// clang-format on

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4132,6 +4132,30 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
41324132
case LibFunc_tanf:
41334133
case LibFunc_tanl:
41344134
return Intrinsic::tan;
4135+
case LibFunc_asin:
4136+
case LibFunc_asinf:
4137+
case LibFunc_asinl:
4138+
return Intrinsic::asin;
4139+
case LibFunc_acos:
4140+
case LibFunc_acosf:
4141+
case LibFunc_acosl:
4142+
return Intrinsic::acos;
4143+
case LibFunc_atan:
4144+
case LibFunc_atanf:
4145+
case LibFunc_atanl:
4146+
return Intrinsic::atan;
4147+
case LibFunc_sinh:
4148+
case LibFunc_sinhf:
4149+
case LibFunc_sinhl:
4150+
return Intrinsic::sinh;
4151+
case LibFunc_cosh:
4152+
case LibFunc_coshf:
4153+
case LibFunc_coshl:
4154+
return Intrinsic::cosh;
4155+
case LibFunc_tanh:
4156+
case LibFunc_tanhf:
4157+
case LibFunc_tanhl:
4158+
return Intrinsic::tanh;
41354159
case LibFunc_exp:
41364160
case LibFunc_expf:
41374161
case LibFunc_expl:

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -398,10 +398,10 @@ for.end:
398398

399399
define void @acos_f64(ptr nocapture %varray) {
400400
; CHECK-LABEL: @acos_f64(
401-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403-
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
401+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
405405
; CHECK: ret void
406406
;
407407
entry:
@@ -424,7 +424,7 @@ for.end:
424424

425425
define void @acos_f32(ptr nocapture %varray) {
426426
; CHECK-LABEL: @acos_f32(
427-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
427+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
428428
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
429429
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
430430
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -502,10 +502,10 @@ for.end:
502502

503503
define void @asin_f64(ptr nocapture %varray) {
504504
; CHECK-LABEL: @asin_f64(
505-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
505+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
507507
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
508-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
508+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
509509
; CHECK: ret void
510510
;
511511
entry:
@@ -528,7 +528,7 @@ for.end:
528528

529529
define void @asin_f32(ptr nocapture %varray) {
530530
; CHECK-LABEL: @asin_f32(
531-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
531+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
532532
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
533533
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
534534
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -609,7 +609,7 @@ define void @atan_f64(ptr nocapture %varray) {
609609
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
610610
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
611611
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
612-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
612+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
613613
; CHECK: ret void
614614
;
615615
entry:
@@ -632,7 +632,7 @@ for.end:
632632

633633
define void @atan_f32(ptr nocapture %varray) {
634634
; CHECK-LABEL: @atan_f32(
635-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
635+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
636636
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
637637
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
638638
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -710,10 +710,10 @@ for.end:
710710

711711
define void @sinh_f64(ptr nocapture %varray) {
712712
; CHECK-LABEL: @sinh_f64(
713-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715-
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
713+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
717717
; CHECK: ret void
718718
;
719719
entry:
@@ -736,10 +736,10 @@ for.end:
736736

737737
define void @sinh_f32(ptr nocapture %varray) {
738738
; CHECK-LABEL: @sinh_f32(
739-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741-
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
739+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
743743
; CHECK: ret void
744744
;
745745
entry:
@@ -815,9 +815,9 @@ for.end:
815815
define void @cosh_f64(ptr nocapture %varray) {
816816
; CHECK-LABEL: @cosh_f64(
817817
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
818-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819-
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
818+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
821821
; CHECK: ret void
822822
;
823823
entry:
@@ -840,10 +840,10 @@ for.end:
840840

841841
define void @cosh_f32(ptr nocapture %varray) {
842842
; CHECK-LABEL: @cosh_f32(
843-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
843+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
844844
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
845845
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
846-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
846+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
847847
; CHECK: ret void
848848
;
849849
entry:
@@ -918,10 +918,10 @@ for.end:
918918

919919
define void @tanh_f64(ptr nocapture %varray) {
920920
; CHECK-LABEL: @tanh_f64(
921-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922-
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923-
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924-
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
921+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
925925
; CHECK: ret void
926926
;
927927
entry:
@@ -944,7 +944,7 @@ for.end:
944944

945945
define void @tanh_f32(ptr nocapture %varray) {
946946
; CHECK-LABEL: @tanh_f32(
947-
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
947+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
948948
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
949949
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
950950
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])

0 commit comments

Comments
 (0)