intel · dm-vodopyanov · Oct 20, 2023 · Oct 19, 2023 · Oct 20, 2023
@@ -1,7 +1,6 @@
 #define TM 8
 #define TN SG_SZ
 #define TK 16
-#define BF16_EPSILON 0.00781250
 
 template <typename T1, typename T2, size_t M, size_t N, size_t K>
 void matrix_multiply(big_matrix<T1, M, N> &C, big_matrix<T2, M, K> &A,

@@ -5,8 +5,14 @@
 
 using bfloat16 = sycl::ext::oneapi::bfloat16;
 
-constexpr float BF16_EPSILON = 10e-2;
-constexpr float FLOAT_EPSILON = 10e-3;
+// Most of the time, failures related to floating-point calculations (both float
+// and bfloat16) are caused by accumulation errors rather than the algorithm
+// itself. If it is an algorithm issue, the calculated result gap from the
+// reference would be much bigger. To avoid flaky test results while catching
+// algorithm errors, we are increasing the accuracy threshold.
+// Something like this should be good enough to catch algorithm errors:
+// fabs(ref[i] - val[i])/max(fabs(ref)) < 10e-2
+constexpr float FLOAT_EPSILON = 10e-2;
 
 template <typename T, size_t NUM_ROWS, size_t NUM_COLS> struct big_matrix {
 public:
@@ -103,11 +109,11 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) {
     for (int j = 0; j < cols; j++) {
       if constexpr (std::is_same_v<T1, float> || std::is_same_v<T1, bfloat16>) {
         float diff = std::fabs(src[i * cols + j] - (T1)ref[i * cols + j]);
-        if (std::is_same_v<T1, float> && diff > FLOAT_EPSILON ||
-            std::is_same_v<T1, bfloat16> && diff > BF16_EPSILON) {
+        if (diff > FLOAT_EPSILON) {
           std::cout << "Incorrect result in matrix. Ref: "
                     << (T1)ref[i * cols + j] << ", Val: " << src[i * cols + j]
-                    << ", Diff: " << diff << "\n";
+                    << ", Diff: " << diff << ", Epsilon: " << FLOAT_EPSILON
+                    << "\n";
           return false;
         }
       } else if constexpr (std::is_same_v<T1, int32_t>) {

@@ -52,8 +52,6 @@ constexpr unsigned int recordThresh = 10;
 #define KCACHE2 32
 #endif
 
-#define BF16_EPSILON 0.00781250
-
 #ifdef MANUAL_UNROLL
 template <class T, T... inds, class F>
 static constexpr void loop(std::integer_sequence<T, inds...>, F &&f) {