[OpenMP][AMDGCN] Initial math headers support

pdhaliwal-amd · pdhaliwal-amd · commit 12da97ea10a9 · 2021-07-30T14:52:41.000Z
With this patch, OpenMP on AMDGCN will use the math functions provided by ROCm ocml library. Linking device code to the ocml will be done in the next patch. Reviewed By: JonChesterfield, jdoerfert, scchan Differential Revision: https://reviews.llvm.org/D104904
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1256,7 +1256,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
-      getToolChain().getTriple().isNVPTX()){
+      (getToolChain().getTriple().isNVPTX() ||
+       getToolChain().getTriple().isAMDGCN())) {
     if (!Args.hasArg(options::OPT_nobuiltininc)) {
       // Add openmp_wrappers/* to our system include path.  This lets us wrap
       // standard library headers.
diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
@@ -9,7 +9,7 @@
 #ifndef __CLANG_HIP_MATH_H__
 #define __CLANG_HIP_MATH_H__
 
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
@@ -19,18 +19,27 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
+
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif
 
 // A few functions return bool type starting only in C++11.
 #pragma push_macro("__RETURN_TYPE")
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else
 #if defined(__cplusplus)
 #define __RETURN_TYPE bool
 #else
 #define __RETURN_TYPE int
 #endif
+#endif // __OPENMP_AMDGCN__
 
 #if defined (__cplusplus) && __cplusplus < 201103L
 // emulate static_assert on type sizes
@@ -1262,15 +1271,15 @@ float min(float __x, float __y) { return fminf(__x, __y); }
 __DEVICE__
 double min(double __x, double __y) { return fmin(__x, __y); }
 
-#if !defined(__HIPCC_RTC__)
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 __host__ inline static int min(int __arg1, int __arg2) {
   return std::min(__arg1, __arg2);
 }
 
 __host__ inline static int max(int __arg1, int __arg2) {
   return std::max(__arg1, __arg2);
 }
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 #endif
 
 #pragma pop_macro("__DEVICE__")
diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -14,13 +14,13 @@
 #error "This file is for OpenMP compilation only."
 #endif
 
-#pragma omp begin declare variant match(                                       \
-    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
 #define __CUDA__
 #define __OPENMP_NVPTX__
 
@@ -33,12 +33,32 @@ extern "C" {
 #undef __OPENMP_NVPTX__
 #undef __CUDA__
 
-#ifdef __cplusplus
-} // extern "C"
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+// Import types which will be used by __clang_hip_libdevice_declares.h
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
 #endif
 
+#define __OPENMP_AMDGCN__
+#pragma push_macro("__device__")
+#define __device__
+
+/// Include declarations for libdevice functions.
+#include <__clang_hip_libdevice_declares.h>
+
+#pragma pop_macro("__device__")
+#undef __OPENMP_AMDGCN__
+
 #pragma omp end declare variant
 
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
 // Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
 // need to `include <new>` in C++ mode.
 #ifdef __cplusplus
diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath
@@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#pragma push_macro("__constant__")
+#define __constant__ __attribute__((constant))
+#define __OPENMP_AMDGCN__
+
+#include <__clang_hip_cmath.h>
+
+#pragma pop_macro("__constant__")
+#undef __OPENMP_AMDGCN__
+
+// Define overloads otherwise which are absent
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+
+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
+__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
+__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
+__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
+__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
+__DEVICE__ float erf(float __x) { return ::erff(__x); }
+__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
+__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
+__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
+__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
+__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
+__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
+__DEVICE__ float ldexp(float __arg, int __exp) {
+  return ::ldexpf(__arg, __exp);
+}
+__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
+__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
+__DEVICE__ float logb(float __x) { return ::logbf(__x); }
+__DEVICE__ float nextafter(float __x, float __y) {
+  return ::nextafterf(__x, __y);
+}
+__DEVICE__ float remainder(float __x, float __y) {
+  return ::remainderf(__x, __y);
+}
+__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
+
+#undef __DEVICE__
+
+#pragma omp end declare variant
+#endif // __AMDGCN__
+
 #endif
diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h
@@ -48,4 +48,12 @@
 
 #pragma omp end declare variant
 
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#define __OPENMP_AMDGCN__
+#include <__clang_hip_math.h>
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+
 #endif
diff --git a/clang/test/Headers/Inputs/include/algorithm b/clang/test/Headers/Inputs/include/algorithm
@@ -0,0 +1,6 @@
+#pragma once
+
+namespace std {
+ template<class T> constexpr const T& min(const T& a, const T& b);
+ template<class T> constexpr const T& max(const T& a, const T& b);
+}
diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@ float abs(float __x) { return fabs(__x); }
 double abs(double __x) { return fabs(__x); }
 
 }
+
diff --git a/clang/test/Headers/Inputs/include/utility b/clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+
diff --git a/clang/test/Headers/amdgcn_openmp_device_math.c b/clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+    // CHECK: call double @__ocml_sin_f64
+    double l1 = sin(x);
+    // CHECK: call double @__ocml_cos_f64
+    double l2 = cos(x);
+    // CHECK: call double @__ocml_fabs_f64
+    double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+    // CHECK-C: call double @__ocml_sin_f64
+    // CHECK-CPP: call float @__ocml_sin_f32
+    float l1 = sin(x);
+    // CHECK-C: call double @__ocml_cos_f64
+    // CHECK-CPP: call float @__ocml_cos_f32
+    float l2 = cos(x);
+    // CHECK-C: call double @__ocml_fabs_f64
+    // CHECK-CPP: call float @__ocml_fabs_f32
+    float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+    // CHECK: call float @__ocml_sin_f32
+    float l1 = sinf(x);
+    // CHECK: call float @__ocml_cos_f32
+    float l2 = cosf(x);
+    // CHECK: call float @__ocml_fabs_f32
+    float l3 = fabsf(x);
+  }
+}
diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }

Original file line number	Diff line number	Diff line change
`@@ -27,3 +27,4 @@ float abs(float __x) { return fabs(__x); }`
`27`	`27`	`double abs(double __x) { return fabs(__x); }`
`28`	`28`
`29`	`29`	`}`
	`30`	`+`