Revert SVML support for sqrt

WenleiHe · WenleiHe · commit 89e8a8b223b2 · 2020-10-05T08:13:11.000-07:00
As was brought up in D87169 by @craig.topper we shouldn't map llvm.sqrt to svml since there is a faster native instruction. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_p&expand=5824,5823,5356,5823,5825,5365,5356 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D88620
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -309,14 +309,6 @@ TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
 TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
 TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
 
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8)
-
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16)
-
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -40,8 +40,6 @@ declare float @llvm.log10.f32(float) #0
 
 declare double @sqrt(double) #0
 declare float @sqrtf(float) #0
-declare double @llvm.sqrt.f64(double) #0
-declare float @llvm.sqrt.f32(float) #0
 
 declare double @exp2(double) #0
 declare float @exp2f(float) #0
@@ -746,52 +744,6 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64_intrinsic(double* nocapture %varray) {
-; CHECK-LABEL: @sqrt_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
-; CHECK:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to double
-  %call = tail call double @llvm.sqrt.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-define void @sqrt_f32_intrinsic(float* nocapture %varray) {
-; CHECK-LABEL: @sqrt_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
-; CHECK:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to float
-  %call = tail call float @llvm.sqrt.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
 define void @exp2_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])