We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
round_to_tf32
1 parent e59098a commit b5023eaCopy full SHA for b5023ea
sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp
@@ -7,6 +7,7 @@
7
// ===--------------------------------------------------------------------=== //
8
9
#pragma once
10
+#include "sycl/detail/defines_elementary.hpp"
11
#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
12
13
namespace sycl {
@@ -764,7 +765,7 @@ joint_matrix_mad(
764
765
766
// This function rounds the bottom 13 bits up or down, and then zeros out the
767
// bottom bits
-float round_to_tf32(float a) {
768
+inline __SYCL_ALWAYS_INLINE float round_to_tf32(float a) {
769
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
770
int32_t tmp_int = __nvvm_f2tf32_rna(a);
771
return __nvvm_bitcast_i2f(tmp_int);
0 commit comments