Skip to content

[SYCL] Disable _bf16 instrinsics for SYCL #7338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));

#ifndef __SYCL_DEVICE_ONLY__
typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32)));
typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
#endif
#endif

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Headers/avxneconvertintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
__attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \
__min_vector_width__(256)))

#ifndef __SYCL_DEVICE_ONLY__
/// Convert scalar BF16 (16-bit) floating-point element
/// stored at memory locations starting at location \a __A to a
/// single-precision (32-bit) floating-point, broadcast it to packed
Expand Down Expand Up @@ -90,6 +91,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_bcstnebf16_ps(const void *__A) {
return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A);
}
#endif

/// Convert scalar half-precision (16-bit) floating-point element
/// stored at memory locations starting at location \a __A to a
Expand Down Expand Up @@ -157,6 +159,7 @@ _mm256_bcstnesh_ps(const void *__A) {
return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed BF16 (16-bit) floating-point even-indexed elements
/// stored at memory locations starting at location \a __A to packed
/// single-precision (32-bit) floating-point elements, and store the results in
Expand Down Expand Up @@ -222,6 +225,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtneebf16_ps(const __m256bh *__A) {
return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A);
}
#endif

/// Convert packed half-precision (16-bit) floating-point even-indexed elements
/// stored at memory locations starting at location \a __A to packed
Expand Down Expand Up @@ -289,6 +293,7 @@ _mm256_cvtneeph_ps(const __m256h *__A) {
return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed BF16 (16-bit) floating-point odd-indexed elements
/// stored at memory locations starting at location \a __A to packed
/// single-precision (32-bit) floating-point elements, and store the results in
Expand Down Expand Up @@ -354,6 +359,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtneobf16_ps(const __m256bh *__A) {
return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A);
}
#endif

/// Convert packed half-precision (16-bit) floating-point odd-indexed elements
/// stored at memory locations starting at location \a __A to packed
Expand Down Expand Up @@ -421,6 +427,7 @@ _mm256_cvtneoph_ps(const __m256h *__A) {
return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A);
}

#ifndef __SYCL_DEVICE_ONLY__
/// Convert packed single-precision (32-bit) floating-point elements in \a __A
/// to packed BF16 (16-bit) floating-point elements, and store the results in \a
/// dst.
Expand Down Expand Up @@ -476,6 +483,7 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_cvtneps_avx_pbh(__m256 __A) {
return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A);
}
#endif

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));

#ifndef __SYCL_DEVICE_ONLY__
typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#endif
#endif

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
#include <avx512vlfp16intrin.h>
#endif

#ifndef __SYCL_DEVICE_ONLY__
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BF16__)
#include <avx512bf16intrin.h>
Expand All @@ -238,6 +239,7 @@
(defined(__AVX512VL__) && defined(__AVX512BF16__))
#include <avx512vlbf16intrin.h>
#endif
#endif

#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PKU__)
Expand Down
17 changes: 3 additions & 14 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1527,13 +1527,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
break;
case DeclSpec::TST_half: Result = Context.HalfTy; break;
case DeclSpec::TST_BFloat16:
// Disable errors for SYCL and OpenMP device since definition of __bf16 is
// being moved to a shared header and it causes new errors emitted when
// host code is compiled with device compiler for SPIR target.
// FIXME: device code specific diagnostic is probably needed.
if (!S.Context.getTargetInfo().hasBFloat16Type() &&
!S.getLangOpts().SYCLIsDevice && !S.getLangOpts().OpenMPIsDevice)
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__bf16";
if (!S.Context.getTargetInfo().hasBFloat16Type())
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "__bf16";
Result = Context.BFloat16Ty;
break;
case DeclSpec::TST_float: Result = Context.FloatTy; break;
Expand Down Expand Up @@ -2730,15 +2726,8 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
}

if (!TypeSize || VectorSizeBits % TypeSize) {
// Disable errors for SYCL and OpenMP device since definition of __bf16 is
// being moved to a shared header and it causes new errors emitted when
// host code is compiled with device compiler for SPIR target.
// FIXME: device code specific diagnostic is probably needed.
if (!(!TypeSize &&
(getLangOpts().OpenMPIsDevice || getLangOpts().SYCLIsDevice))) {
Diag(AttrLoc, diag::err_attribute_invalid_size)
<< SizeExpr->getSourceRange();
}
return QualType();
}

Expand Down