clang: Add __builtin_elementwise_sqrt

arsenm · arsenm · commit 9e3d9c9eae03 · 2023-08-11T19:32:39.000-04:00
This will be used in the opencl builtin headers to provide direct intrinsic access with proper !fpmath metadata. https://reviews.llvm.org/D156737
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
@@ -643,6 +643,8 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_bitreverse(T x)     return the integer represented after reversing the bits of x     integer types
  T __builtin_elementwise_exp(T x)            returns the base-e exponential, e^x, of the specified value      floating point types
  T __builtin_elementwise_exp2(T x)           returns the base-2 exponential, 2^x, of the specified value      floating point types
+
+ T __builtin_elementwise_sqrt(T x)           return the square root of a floating-point number                floating point types
  T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
                                              rounding halfway cases to even (that is, to the nearest value
                                              that is an even integer), regardless of the current rounding
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -239,6 +239,7 @@ Floating Point Support in Clang
 - Add ``__builtin_set_flt_rounds`` builtin for X86, x86_64, Arm and AArch64 only.
 - Add ``__builtin_elementwise_pow`` builtin for floating point types only.
 - Add ``__builtin_elementwise_bitreverse`` builtin for integer types only.
+- Add ``__builtin_elementwise_sqrt`` builtin for floating point types only.
 
 AST Matchers
 ------------
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
@@ -694,6 +694,7 @@ BUILTIN(__builtin_elementwise_round, "v.", "nct")
 BUILTIN(__builtin_elementwise_rint, "v.", "nct")
 BUILTIN(__builtin_elementwise_nearbyint, "v.", "nct")
 BUILTIN(__builtin_elementwise_sin, "v.", "nct")
+BUILTIN(__builtin_elementwise_sqrt, "v.", "nct")
 BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
 BUILTIN(__builtin_elementwise_canonicalize, "v.", "nct")
 BUILTIN(__builtin_elementwise_copysign, "v.", "nct")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2530,7 +2530,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     case Builtin::BI__builtin_sqrtf:
     case Builtin::BI__builtin_sqrtf16:
     case Builtin::BI__builtin_sqrtl:
-    case Builtin::BI__builtin_sqrtf128: {
+    case Builtin::BI__builtin_sqrtf128:
+    case Builtin::BI__builtin_elementwise_sqrt: {
       llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
           *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
       SetSqrtFPAccuracy(Call);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2642,6 +2642,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_rint:
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
+  case Builtin::BI__builtin_elementwise_sqrt:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -588,6 +588,22 @@ void test_builtin_elementwise_sin(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_sin(vf1);
 }
 
+void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_sqrt(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.sqrt.f32(float [[F1]])
+  f2 = __builtin_elementwise_sqrt(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.sqrt.f64(double [[D1]])
+  d2 = __builtin_elementwise_sqrt(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_sqrt(vf1);
+}
+
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -177,6 +177,16 @@ float4 strict_elementwise_sin(float4 a) {
   return __builtin_elementwise_sin(a);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_sqrtDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float4 strict_elementwise_sqrt(float4 a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenCUDA/correctly-rounded-div.cu b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -46,4 +46,18 @@ __device__ double dpscalarsqrt(double a) {
   return __builtin_sqrt(a);
 }
 
+// COMMON-LABEL: @_Z28test_builtin_elementwise_f32f
+// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
+// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
+__device__ float test_builtin_elementwise_f32(float a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
+// COMMON-LABEL: @_Z28test_builtin_elementwise_f64d
+// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
+// COMMON-NOT: !fpmath
+__device__ double test_builtin_elementwise_f64(double a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
 // NCRSQRT: ![[MD]] = !{float 2.500000e+00}
diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl
@@ -28,6 +28,21 @@ float spscalarsqrt(float a) {
   return __builtin_sqrtf(a);
 }
 
+float elementwise_sqrt_f32(float a) {
+  // CHECK-LABEL: @elementwise_sqrt_f32
+  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+float4 elementwise_sqrt_v4f32(float4 a) {
+  // CHECK-LABEL: @elementwise_sqrt_v4f32
+  // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+
 #if __OPENCL_C_VERSION__ >=120
 void printf(constant char* fmt, ...);
 
@@ -61,6 +76,18 @@ double dpscalarsqrt(double a) {
   return __builtin_sqrt(a);
 }
 
+double elementwise_sqrt_f64(double a) {
+  // CHECK-LABEL: @elementwise_sqrt_f64
+  // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+double4 elementwise_sqrt_v4f64(double4 a) {
+  // CHECK-LABEL: @elementwise_sqrt_v4f64
+  // CHECK: call <4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
 #endif
 
 // NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
@@ -601,6 +601,27 @@ void test_builtin_elementwise_sin(int i, float f, double d, float4 v, int3 iv, u
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_sqrt(f);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_sqrt();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_sqrt(i);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_sqrt(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_sqrt(u);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_sqrt(uv);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -111,6 +111,13 @@ void test_builtin_elementwise_sin() {
   static_assert(!is_const<decltype(__builtin_elementwise_sin(b))>::value);
 }
 
+void test_builtin_elementwise_sqrt() {
+  const float a = 42.0;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_sqrt(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_sqrt(b))>::value);
+}
+
 void test_builtin_elementwise_log() {
   const float a = 42.0;
   float b = 42.3;