Skip to content

Commit b5023ea

Browse files
authored
[SYCL][ext] Add always_inline attribute to round_to_tf32 (#6531)
This should have been defined with always inline to avoid multiple symbols in multi object compilations
1 parent e59098a commit b5023ea

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// ===--------------------------------------------------------------------=== //
88

99
#pragma once
10+
#include "sycl/detail/defines_elementary.hpp"
1011
#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
1112

1213
namespace sycl {
@@ -764,7 +765,7 @@ joint_matrix_mad(
764765

765766
// This function rounds the bottom 13 bits up or down, and then zeros out the
766767
// bottom bits
767-
float round_to_tf32(float a) {
768+
inline __SYCL_ALWAYS_INLINE float round_to_tf32(float a) {
768769
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
769770
int32_t tmp_int = __nvvm_f2tf32_rna(a);
770771
return __nvvm_bitcast_i2f(tmp_int);

0 commit comments

Comments
 (0)