Skip to content

Commit 89e8a8b

Browse files
committed
Revert SVML support for sqrt
As was brought up in D87169 by @craig.topper we shouldn't map llvm.sqrt to svml since there is a faster native instruction. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_p&expand=5824,5823,5356,5823,5825,5365,5356 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D88620
1 parent eaf7329 commit 89e8a8b

File tree

2 files changed

+0
-56
lines changed

2 files changed

+0
-56
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -309,14 +309,6 @@ TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
309309
TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
310310
TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
311311

312-
TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2)
313-
TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4)
314-
TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8)
315-
316-
TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4)
317-
TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8)
318-
TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16)
319-
320312
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
321313
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
322314
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)

llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ declare float @llvm.log10.f32(float) #0
4040

4141
declare double @sqrt(double) #0
4242
declare float @sqrtf(float) #0
43-
declare double @llvm.sqrt.f64(double) #0
44-
declare float @llvm.sqrt.f32(float) #0
4543

4644
declare double @exp2(double) #0
4745
declare float @exp2f(float) #0
@@ -746,52 +744,6 @@ for.end:
746744
ret void
747745
}
748746

749-
define void @sqrt_f64_intrinsic(double* nocapture %varray) {
750-
; CHECK-LABEL: @sqrt_f64_intrinsic(
751-
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
752-
; CHECK: ret void
753-
;
754-
entry:
755-
br label %for.body
756-
757-
for.body:
758-
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
759-
%tmp = trunc i64 %iv to i32
760-
%conv = sitofp i32 %tmp to double
761-
%call = tail call double @llvm.sqrt.f64(double %conv)
762-
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
763-
store double %call, double* %arrayidx, align 4
764-
%iv.next = add nuw nsw i64 %iv, 1
765-
%exitcond = icmp eq i64 %iv.next, 1000
766-
br i1 %exitcond, label %for.end, label %for.body
767-
768-
for.end:
769-
ret void
770-
}
771-
772-
define void @sqrt_f32_intrinsic(float* nocapture %varray) {
773-
; CHECK-LABEL: @sqrt_f32_intrinsic(
774-
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
775-
; CHECK: ret void
776-
;
777-
entry:
778-
br label %for.body
779-
780-
for.body:
781-
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
782-
%tmp = trunc i64 %iv to i32
783-
%conv = sitofp i32 %tmp to float
784-
%call = tail call float @llvm.sqrt.f32(float %conv)
785-
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
786-
store float %call, float* %arrayidx, align 4
787-
%iv.next = add nuw nsw i64 %iv, 1
788-
%exitcond = icmp eq i64 %iv.next, 1000
789-
br i1 %exitcond, label %for.end, label %for.body
790-
791-
for.end:
792-
ret void
793-
}
794-
795747
define void @exp2_f64(double* nocapture %varray) {
796748
; CHECK-LABEL: @exp2_f64(
797749
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])

0 commit comments

Comments
 (0)