intel · steffenlarsen · Jan 30, 2023 · Jan 10, 2023 · Jan 26, 2023 · Jan 26, 2023
@@ -2146,6 +2146,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
       if (Target->hasBFloat16Type()) {
         Width = Target->getBFloat16Width();
         Align = Target->getBFloat16Align();
+      } else if ((getLangOpts().SYCLIsDevice ||
+                  (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)) &&
+                 AuxTarget->hasBFloat16Type()) {
+        Width = AuxTarget->getBFloat16Width();
+        Align = AuxTarget->getBFloat16Align();
       }
       break;
     case BuiltinType::Float16:

@@ -3050,7 +3050,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
     break;
   }
   case BuiltinType::BFloat16: {
-    const TargetInfo *TI = &getASTContext().getTargetInfo();
+    const TargetInfo *TI = ((getASTContext().getLangOpts().OpenMP &&
+                             getASTContext().getLangOpts().OpenMPIsDevice) ||
+                            getASTContext().getLangOpts().SYCLIsDevice)
+                               ? getASTContext().getAuxTargetInfo()
+                               : &getASTContext().getTargetInfo();
     Out << TI->getBFloat16Mangling();
     break;
   }

@@ -45,11 +45,9 @@ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
 typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
 typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
 
-#ifndef __SYCL_DEVICE_ONLY__
 typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32)));
 typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
 #endif
-#endif
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))

@@ -25,7 +25,6 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"),   \
                  __min_vector_width__(256)))
 
-#ifndef __SYCL_DEVICE_ONLY__
 /// Convert scalar BF16 (16-bit) floating-point element
 /// stored at memory locations starting at location \a __A to a
 /// single-precision (32-bit) floating-point, broadcast it to packed
@@ -91,7 +90,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_bcstnebf16_ps(const void *__A) {
   return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A);
 }
-#endif
 
 /// Convert scalar half-precision (16-bit) floating-point element
 /// stored at memory locations starting at location \a __A to a
@@ -159,7 +157,6 @@ _mm256_bcstnesh_ps(const void *__A) {
   return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A);
 }
 
-#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed BF16 (16-bit) floating-point even-indexed elements
 /// stored at memory locations starting at location \a __A to packed
 /// single-precision (32-bit) floating-point elements, and store the results in
@@ -225,7 +222,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtneebf16_ps(const __m256bh *__A) {
   return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A);
 }
-#endif
 
 /// Convert packed half-precision (16-bit) floating-point even-indexed elements
 /// stored at memory locations starting at location \a __A to packed
@@ -293,7 +289,6 @@ _mm256_cvtneeph_ps(const __m256h *__A) {
   return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A);
 }
 
-#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed BF16 (16-bit) floating-point odd-indexed elements
 /// stored at memory locations starting at location \a __A to packed
 /// single-precision (32-bit) floating-point elements, and store the results in
@@ -359,7 +354,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtneobf16_ps(const __m256bh *__A) {
   return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A);
 }
-#endif
 
 /// Convert packed half-precision (16-bit) floating-point odd-indexed elements
 /// stored at memory locations starting at location \a __A to packed
@@ -427,7 +421,6 @@ _mm256_cvtneoph_ps(const __m256h *__A) {
   return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A);
 }
 
-#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed single-precision (32-bit) floating-point elements in \a __A
 /// to packed BF16 (16-bit) floating-point elements, and store the results in \a
 /// dst.
@@ -483,7 +476,6 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS256
 _mm256_cvtneps_avx_pbh(__m256 __A) {
   return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A);
 }
-#endif
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256

@@ -44,11 +44,9 @@ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
 
-#ifndef __SYCL_DEVICE_ONLY__
 typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
 #endif
-#endif
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \

@@ -229,7 +229,6 @@
 #include <avx512vlfp16intrin.h>
 #endif
 
-#ifndef __SYCL_DEVICE_ONLY__
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__AVX512BF16__)
 #include <avx512bf16intrin.h>
@@ -239,7 +238,6 @@
     (defined(__AVX512VL__) && defined(__AVX512BF16__))
 #include <avx512vlbf16intrin.h>
 #endif
-#endif
 
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__PKU__)

@@ -2073,6 +2073,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
         (Ty->isIbm128Type() && !Context.getTargetInfo().hasIbm128Type()) ||
         (Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 &&
          !Context.getTargetInfo().hasInt128Type()) ||
+        (Ty->isBFloat16Type() && !Context.getTargetInfo().hasBFloat16Type() &&
+         !LangOpts.CUDAIsDevice) ||
         LongDoubleMismatched) {
       PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
       if (D)

@@ -434,6 +434,7 @@ static void checkSYCLType(Sema &S, QualType Ty, SourceRange Loc,
   if (Ty->isSpecificBuiltinType(BuiltinType::Int128) ||
       Ty->isSpecificBuiltinType(BuiltinType::UInt128) ||
       Ty->isSpecificBuiltinType(BuiltinType::LongDouble) ||
+      Ty->isSpecificBuiltinType(BuiltinType::BFloat16) ||
       (Ty->isSpecificBuiltinType(BuiltinType::Float128) &&
        !S.Context.getTargetInfo().hasFloat128Type())) {
     S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_type_unsupported)

@@ -1527,9 +1527,10 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     break;
   case DeclSpec::TST_half:    Result = Context.HalfTy; break;
   case DeclSpec::TST_BFloat16:
-    if (!S.Context.getTargetInfo().hasBFloat16Type())
-      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
-	<< "__bf16";
+    if (!S.Context.getTargetInfo().hasBFloat16Type() &&
+        !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) &&
+        !S.getLangOpts().SYCLIsDevice)
+        S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__bf16";
     Result = Context.BFloat16Ty;
     break;
   case DeclSpec::TST_float:   Result = Context.FloatTy; break;

@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple spir64 -aux-triple x86_64-unknown-linux-gnu -fsycl-is-device -verify -fsyntax-only %s
+
+template <typename Name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc(); // expected-note {{called by 'kernel}}
+}
+
+void host_ok(void) {
+  __bf16 A;
+}
+
+int main()
+{  host_ok();
+  __bf16 var; // expected-note {{'var' defined here}}
+  kernel<class variables>([=]() {
+    (void)var; // expected-error {{'var' requires 16 bit size '__bf16' type support, but target 'spir64' does not support it}}
+    int B = sizeof(__bf16);
+  });
+
+  return 0;
+}
+