Skip to content

[SYCL][OpenMP] Correctly handle __bf16 intrinsics using deferred diagnostics #8116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2146,6 +2146,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
if (Target->hasBFloat16Type()) {
Width = Target->getBFloat16Width();
Align = Target->getBFloat16Align();
} else if ((getLangOpts().SYCLIsDevice ||
(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)) &&
AuxTarget->hasBFloat16Type()) {
Width = AuxTarget->getBFloat16Width();
Align = AuxTarget->getBFloat16Align();
}
break;
case BuiltinType::Float16:
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3050,7 +3050,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
break;
}
case BuiltinType::BFloat16: {
const TargetInfo *TI = &getASTContext().getTargetInfo();
const TargetInfo *TI = ((getASTContext().getLangOpts().OpenMP &&
getASTContext().getLangOpts().OpenMPIsDevice) ||
getASTContext().getLangOpts().SYCLIsDevice)
? getASTContext().getAuxTargetInfo()
: &getASTContext().getTargetInfo();
Out << TI->getBFloat16Mangling();
break;
}
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,9 @@ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));

#ifndef __SYCL_DEVICE_ONLY__
typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32)));
typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
#endif
#endif

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
Expand Down
8 changes: 0 additions & 8 deletions clang/lib/Headers/avxneconvertintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
__attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \
__min_vector_width__(256)))

#ifndef __SYCL_DEVICE_ONLY__
/// Convert scalar BF16 (16-bit) floating-point element
/// stored at memory locations starting at location \a __A to a
/// single-precision (32-bit) floating-point, broadcast it to packed
Expand Down Expand Up @@ -91,7 +90,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_bcstnebf16_ps(const void *__A) {
return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A);
}
#endif

/// Convert scalar half-precision (16-bit) floating-point element
/// stored at memory locations starting at location \a __A to a
Expand Down Expand Up @@ -159,7 +157,6 @@ _mm256_bcstnesh_ps(const void *__A) {
return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed BF16 (16-bit) floating-point even-indexed elements
/// stored at memory locations starting at location \a __A to packed
/// single-precision (32-bit) floating-point elements, and store the results in
Expand Down Expand Up @@ -225,7 +222,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtneebf16_ps(const __m256bh *__A) {
return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A);
}
#endif

/// Convert packed half-precision (16-bit) floating-point even-indexed elements
/// stored at memory locations starting at location \a __A to packed
Expand Down Expand Up @@ -293,7 +289,6 @@ _mm256_cvtneeph_ps(const __m256h *__A) {
return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed BF16 (16-bit) floating-point odd-indexed elements
/// stored at memory locations starting at location \a __A to packed
/// single-precision (32-bit) floating-point elements, and store the results in
Expand Down Expand Up @@ -359,7 +354,6 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtneobf16_ps(const __m256bh *__A) {
return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A);
}
#endif

/// Convert packed half-precision (16-bit) floating-point odd-indexed elements
/// stored at memory locations starting at location \a __A to packed
Expand Down Expand Up @@ -427,7 +421,6 @@ _mm256_cvtneoph_ps(const __m256h *__A) {
return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed single-precision (32-bit) floating-point elements in \a __A
/// to packed BF16 (16-bit) floating-point elements, and store the results in \a
/// dst.
Expand Down Expand Up @@ -483,7 +476,6 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_cvtneps_avx_pbh(__m256 __A) {
return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A);
}
#endif

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,9 @@ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));

#ifndef __SYCL_DEVICE_ONLY__
typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#endif
#endif

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@
#include <avx512vlfp16intrin.h>
#endif

#ifndef __SYCL_DEVICE_ONLY__
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BF16__)
#include <avx512bf16intrin.h>
Expand All @@ -239,7 +238,6 @@
(defined(__AVX512VL__) && defined(__AVX512BF16__))
#include <avx512vlbf16intrin.h>
#endif
#endif

#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PKU__)
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/Sema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2073,6 +2073,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
(Ty->isIbm128Type() && !Context.getTargetInfo().hasIbm128Type()) ||
(Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 &&
!Context.getTargetInfo().hasInt128Type()) ||
(Ty->isBFloat16Type() && !Context.getTargetInfo().hasBFloat16Type() &&
!LangOpts.CUDAIsDevice) ||
LongDoubleMismatched) {
PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
if (D)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ static void checkSYCLType(Sema &S, QualType Ty, SourceRange Loc,
if (Ty->isSpecificBuiltinType(BuiltinType::Int128) ||
Ty->isSpecificBuiltinType(BuiltinType::UInt128) ||
Ty->isSpecificBuiltinType(BuiltinType::LongDouble) ||
Ty->isSpecificBuiltinType(BuiltinType::BFloat16) ||
(Ty->isSpecificBuiltinType(BuiltinType::Float128) &&
!S.Context.getTargetInfo().hasFloat128Type())) {
S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_type_unsupported)
Expand Down
7 changes: 4 additions & 3 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1527,9 +1527,10 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
break;
case DeclSpec::TST_half: Result = Context.HalfTy; break;
case DeclSpec::TST_BFloat16:
if (!S.Context.getTargetInfo().hasBFloat16Type())
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "__bf16";
if (!S.Context.getTargetInfo().hasBFloat16Type() &&
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) &&
!S.getLangOpts().SYCLIsDevice)
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__bf16";
Result = Context.BFloat16Ty;
break;
case DeclSpec::TST_float: Result = Context.FloatTy; break;
Expand Down
22 changes: 22 additions & 0 deletions clang/test/SemaSYCL/bf16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %clang_cc1 -triple spir64 -aux-triple x86_64-unknown-linux-gnu -fsycl-is-device -verify -fsyntax-only %s

template <typename Name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
kernelFunc(); // expected-note {{called by 'kernel}}
}

void host_ok(void) {
__bf16 A;
}

int main()
{ host_ok();
__bf16 var; // expected-note {{'var' defined here}}
kernel<class variables>([=]() {
(void)var; // expected-error {{'var' requires 16 bit size '__bf16' type support, but target 'spir64' does not support it}}
int B = sizeof(__bf16);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Sorry, this is late. My notifications weren't working.)

Maybe a nit. Why is this declaration of B needed? Or is this showing that it does not get a diagnostic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I added it to show we don't get a diagnostic when used in unevaluated context

});

return 0;
}