Skip to content

Commit efc6b50

Browse files
authored
1 parent 80ff391 commit efc6b50

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,9 @@ TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_
13711371

13721372
TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13731373
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1374+
TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13741375

1376+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13751377
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13761378
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13771379

@@ -1389,17 +1391,21 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LL
13891391
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13901392
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13911393

1394+
TLI_DEFINE_VECFUNC("cosh", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13921395
TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13931396
TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13941397

1398+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13951399
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13961400
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13971401

13981402
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13991403
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1404+
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
14001405

14011406
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
14021407
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1408+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
14031409

14041410
TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
14051411
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,10 @@ define void @acos_f32(ptr nocapture %varray) {
412412
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
413413
; CHECK: ret void
414414
;
415+
; CHECK-AVX512-VF16-LABEL: @acos_f32(
416+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
417+
; CHECK-AVX512-VF16: ret void
418+
;
415419
entry:
416420
br label %for.body
417421

@@ -435,6 +439,10 @@ define void @acos_f32_intrinsic(ptr nocapture %varray) {
435439
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
436440
; CHECK: ret void
437441
;
442+
; CHECK-AVX512-VF16-LABEL: @acos_f32_intrinsic(
443+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
444+
; CHECK-AVX512-VF16: ret void
445+
;
438446
entry:
439447
br label %for.body
440448

@@ -669,6 +677,29 @@ for.end:
669677
ret void
670678
}
671679

680+
define void @cosh_f64(ptr nocapture %varray) {
681+
; CHECK-AVX-VF2-LABEL: @cosh_f64(
682+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
683+
; CHECK-AVX-VF2: ret void
684+
;
685+
entry:
686+
br label %for.body
687+
688+
for.body:
689+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
690+
%tmp = trunc i64 %iv to i32
691+
%conv = sitofp i32 %tmp to double
692+
%call = tail call double @cosh(double %conv)
693+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
694+
store double %call, ptr %arrayidx, align 4
695+
%iv.next = add nuw nsw i64 %iv, 1
696+
%exitcond = icmp eq i64 %iv.next, 1000
697+
br i1 %exitcond, label %for.end, label %for.body
698+
699+
for.end:
700+
ret void
701+
}
702+
672703
define void @cosh_f32(ptr nocapture %varray) {
673704
; CHECK-LABEL: @cosh_f32(
674705
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
@@ -692,6 +723,29 @@ for.end:
692723
ret void
693724
}
694725

726+
define void @cosh_f64_intrinsic(ptr nocapture %varray) {
727+
; CHECK-AVX-VF2-LABEL: @cosh_f64_intrinsic(
728+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
729+
; CHECK-AVX-VF2: ret void
730+
;
731+
entry:
732+
br label %for.body
733+
734+
for.body:
735+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
736+
%tmp = trunc i64 %iv to i32
737+
%conv = sitofp i32 %tmp to double
738+
%call = tail call double @llvm.cosh.f64(double %conv)
739+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
740+
store double %call, ptr %arrayidx, align 4
741+
%iv.next = add nuw nsw i64 %iv, 1
742+
%exitcond = icmp eq i64 %iv.next, 1000
743+
br i1 %exitcond, label %for.end, label %for.body
744+
745+
for.end:
746+
ret void
747+
}
748+
695749
define void @cosh_f32_intrinsic(ptr nocapture %varray) {
696750
; CHECK-LABEL: @cosh_f32_intrinsic(
697751
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
@@ -720,6 +774,10 @@ define void @tanh_f32(ptr nocapture %varray) {
720774
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
721775
; CHECK: ret void
722776
;
777+
; CHECK-AVX512-VF16-LABEL: @tanh_f32(
778+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
779+
; CHECK-AVX512-VF16: ret void
780+
;
723781
entry:
724782
br label %for.body
725783

@@ -743,6 +801,10 @@ define void @tanh_f32_intrinsic(ptr nocapture %varray) {
743801
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
744802
; CHECK: ret void
745803
;
804+
; CHECK-AVX512-VF16-LABEL: @tanh_f32_intrinsic(
805+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
806+
; CHECK-AVX512-VF16: ret void
807+
;
746808
entry:
747809
br label %for.body
748810

0 commit comments

Comments
 (0)