Skip to content

Commit 6ad41e9

Browse files
authored
[SYCL][Joint Matrix] Fix for flaky joint_matrix_all_sizes (#11608)
Resolves #11603
1 parent 27b1847 commit 6ad41e9

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

sycl/test-e2e/Matrix/Legacy/joint_matrix_bfloat16_rowmajorA_rowmajorB_impl.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#define TM 8
22
#define TN SG_SZ
33
#define TK 16
4-
#define BF16_EPSILON 0.00781250
54

65
template <typename T1, typename T2, size_t M, size_t N, size_t K>
76
void matrix_multiply(big_matrix<T1, M, N> &C, big_matrix<T2, M, K> &A,

sycl/test-e2e/Matrix/common.hpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@
55

66
using bfloat16 = sycl::ext::oneapi::bfloat16;
77

8-
constexpr float BF16_EPSILON = 10e-2;
9-
constexpr float FLOAT_EPSILON = 10e-3;
8+
// Most of the time, failures related to floating-point calculations (both float
9+
// and bfloat16) are caused by accumulation errors rather than the algorithm
10+
// itself. If it is an algorithm issue, the calculated result gap from the
11+
// reference would be much bigger. To avoid flaky test results while catching
12+
// algorithm errors, we are increasing the accuracy threshold.
13+
// Something like this should be good enough to catch algorithm errors:
14+
// fabs(ref[i] - val[i])/max(fabs(ref)) < 10e-2
15+
constexpr float FLOAT_EPSILON = 10e-2;
1016

1117
template <typename T, size_t NUM_ROWS, size_t NUM_COLS> struct big_matrix {
1218
public:
@@ -103,11 +109,11 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) {
103109
for (int j = 0; j < cols; j++) {
104110
if constexpr (std::is_same_v<T1, float> || std::is_same_v<T1, bfloat16>) {
105111
float diff = std::fabs(src[i * cols + j] - (T1)ref[i * cols + j]);
106-
if (std::is_same_v<T1, float> && diff > FLOAT_EPSILON ||
107-
std::is_same_v<T1, bfloat16> && diff > BF16_EPSILON) {
112+
if (diff > FLOAT_EPSILON) {
108113
std::cout << "Incorrect result in matrix. Ref: "
109114
<< (T1)ref[i * cols + j] << ", Val: " << src[i * cols + j]
110-
<< ", Diff: " << diff << "\n";
115+
<< ", Diff: " << diff << ", Epsilon: " << FLOAT_EPSILON
116+
<< "\n";
111117
return false;
112118
}
113119
} else if constexpr (std::is_same_v<T1, int32_t>) {

sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@ constexpr unsigned int recordThresh = 10;
5252
#define KCACHE2 32
5353
#endif
5454

55-
#define BF16_EPSILON 0.00781250
56-
5755
#ifdef MANUAL_UNROLL
5856
template <class T, T... inds, class F>
5957
static constexpr void loop(std::integer_sequence<T, inds...>, F &&f) {

0 commit comments

Comments
 (0)