Skip to content

Commit de6c9c8

Browse files
[TLI][AArch64] Add TLI Mappings of @llvm.exp10 for ArmPL and SLEEF.
Update regex to _explicitly_ show which exp versions are added. The previous regex used `exp[^e]` to avoid matching calls like: `@llvm.experimental.stepvector`. Note: ArmPL Mappings for scalable types are not yet utilized (eg, `llvm.exp10.nxv2f64`, `llvm.exp10.nxv4f32`), as `replace-with-veclib` pass needs improvements.
1 parent cf1bde3 commit de6c9c8

File tree

6 files changed

+232
-23
lines changed

6 files changed

+232
-23
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,7 @@ TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
492492
TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
493493

494494
TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
495+
TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
495496

496497
TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
497498

@@ -544,6 +545,7 @@ TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
544545
TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
545546

546547
TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
548+
TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
547549

548550
TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
549551

@@ -609,6 +611,8 @@ TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGV
609611

610612
TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
611613
TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
614+
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
615+
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
612616

613617
TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv")
614618
TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -753,6 +757,11 @@ TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
753757
TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
754758
TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
755759

760+
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
761+
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
762+
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
763+
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
764+
756765
TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
757766
TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
758767
TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
1515
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
1616

1717
;.
18-
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
18+
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
1919
;.
2020
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
2121
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -192,6 +192,50 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
192192
ret <vscale x 4 x float> %1
193193
}
194194

195+
declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
196+
declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
197+
declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
198+
declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
199+
200+
define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
201+
; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
202+
; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
203+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
204+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
205+
;
206+
%1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
207+
ret <2 x double> %1
208+
}
209+
210+
define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
211+
; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
212+
; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
213+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
214+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
215+
;
216+
%1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
217+
ret <4 x float> %1
218+
}
219+
220+
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
221+
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
222+
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
223+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN]])
224+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
225+
;
226+
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
227+
ret <vscale x 2 x double> %1
228+
}
229+
230+
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
231+
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
232+
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
233+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN]])
234+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
235+
;
236+
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
237+
ret <vscale x 4 x float> %1
238+
}
195239

196240
declare <2 x double> @llvm.log.v2f64(<2 x double>)
197241
declare <4 x float> @llvm.log.v4f32(<4 x float>)

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,24 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
9595
ret <vscale x 4 x float> %1
9696
}
9797

98+
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
99+
; CHECK-LABEL: @llvm_exp10_vscale_f64(
100+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
101+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
102+
;
103+
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
104+
ret <vscale x 2 x double> %1
105+
}
106+
107+
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
108+
; CHECK-LABEL: @llvm_exp10_vscale_f32(
109+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
110+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
111+
;
112+
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
113+
ret <vscale x 4 x float> %1
114+
}
115+
98116
define <vscale x 2 x double> @llvm_fabs_vscale_f64(<vscale x 2 x double> %in) {
99117
; CHECK-LABEL: @llvm_fabs_vscale_f64(
100118
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
@@ -375,6 +393,8 @@ declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
375393
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
376394
declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
377395
declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
396+
declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
397+
declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
378398
declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
379399
declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
380400
declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
7+
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
88
;.
99
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_f64(
@@ -96,6 +96,24 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
9696
ret <4 x float> %1
9797
}
9898

99+
define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
100+
; CHECK-LABEL: @llvm_exp10_f64(
101+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
102+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
103+
;
104+
%1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
105+
ret <2 x double> %1
106+
}
107+
108+
define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
109+
; CHECK-LABEL: @llvm_exp10_f32(
110+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
111+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
112+
;
113+
%1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
114+
ret <4 x float> %1
115+
}
116+
99117
define <2 x double> @llvm_fabs_f64(<2 x double> %in) {
100118
; CHECK-LABEL: @llvm_fabs_f64(
101119
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[IN:%.*]])
@@ -376,6 +394,8 @@ declare <2 x double> @llvm.exp.v2f64(<2 x double>)
376394
declare <4 x float> @llvm.exp.v4f32(<4 x float>)
377395
declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
378396
declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
397+
declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
398+
declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
379399
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
380400
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
381401
declare <2 x double> @llvm.floor.v2f64(<2 x double>)

llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,57 @@ define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
161161
ret void
162162
}
163163

164+
declare double @llvm.exp10.f64(double)
165+
declare float @llvm.exp10.f32(float)
166+
167+
define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
168+
; CHECK-LABEL: @exp10_f64(
169+
; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.*]])
170+
; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
171+
; CHECK: ret void
172+
;
173+
entry:
174+
br label %for.body
175+
176+
for.body:
177+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
178+
%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
179+
%in = load double, ptr %in.gep, align 8
180+
%call = tail call double @llvm.exp10.f64(double %in)
181+
%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
182+
store double %call, ptr %out.gep, align 8
183+
%iv.next = add nuw nsw i64 %iv, 1
184+
%exitcond = icmp eq i64 %iv.next, 1000
185+
br i1 %exitcond, label %for.end, label %for.body
186+
187+
for.end:
188+
ret void
189+
}
190+
191+
define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
192+
; CHECK-LABEL: @exp10_f32(
193+
; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.*]])
194+
; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
195+
; CHECK: ret void
196+
;
197+
entry:
198+
br label %for.body
199+
200+
for.body:
201+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
202+
%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
203+
%in = load float, ptr %in.gep, align 8
204+
%call = tail call float @llvm.exp10.f32(float %in)
205+
%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
206+
store float %call, ptr %out.gep, align 4
207+
%iv.next = add nuw nsw i64 %iv, 1
208+
%exitcond = icmp eq i64 %iv.next, 1000
209+
br i1 %exitcond, label %for.end, label %for.body
210+
211+
for.end:
212+
ret void
213+
}
214+
164215
declare double @llvm.log.f64(double)
165216
declare float @llvm.log.f32(float)
166217

0 commit comments

Comments
 (0)