[SYCL][E2E][Joint Matrix] Added -ffp-model=precise to some tests (#12429)

YuriPlyakhin · web-flow · commit aadd1e733de7 · 2024-01-18T09:51:25.000-08:00
Added -ffp-model=precise to joint_matrix_bf16_fill_k_cache tests as they
do math with floating numbers and can get NaN result, which indicates
error. Some compiler version may have different fp-model defaults, so
this option make the test indipendent of compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
@@ -8,9 +8,11 @@
 // REQUIRES: matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -o %t.out -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "../common.hpp"
 #include <cstddef>
 
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -8,9 +8,11 @@
 // REQUIRES: matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -o %t.out -DINIT_LIST
+// RUN: %{build} -o %t.out -DINIT_LIST -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "../common.hpp"
 #include <cstddef>
 
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -8,14 +8,15 @@
 // REQUIRES: matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -mllvm -inline-threshold=5000 -o %t_gpu.out -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=5000 -ffp-model=precise -o %t_gpu.out -DMANUAL_UNROLL
 // RUN: %if gpu %{ %{run} %t_gpu.out %}
 
-// RUN: %{build} -mllvm -inline-threshold=5000 -o %t_cpu.out -DMANUAL_UNROLL -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
+// RUN: %{build} -mllvm -inline-threshold=5000 -ffp-model=precise -o %t_cpu.out -DMANUAL_UNROLL -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
 // RUN: %if cpu %{ %{run} %t_cpu.out %}
 
 // -mllvm -inline-threshold added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "../common.hpp"
 #include <cstddef>
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -8,11 +8,12 @@
 // REQUIRES: matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -mllvm -inline-threshold=5000 -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=5000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "../common.hpp"
 #include <cstddef>
diff --git a/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix-xmx8
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -o %t.out -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "../common.hpp"
 #include <cstddef>
 
diff --git a/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix-xmx8
 
-// RUN: %{build} -o %t.out -DINIT_LIST
+// RUN: %{build} -o %t.out -DINIT_LIST -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "../common.hpp"
 #include <cstddef>
 
diff --git a/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix-xmx8
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -o %t.out -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t.out -DMANUAL_UNROLL
 // RUN: %{run} %t.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "../common.hpp"
 #include <cstddef>
diff --git a/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/XMX8/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix-xmx8
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -o %t.out -DINIT_LIST -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t.out -DINIT_LIST -DMANUAL_UNROLL
 // RUN: %{run} %t.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "../common.hpp"
 #include <cstddef>
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -o %t.out -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "common.hpp"
 #include <cstddef>
 
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix, gpu
 
-// RUN: %{build} -o %t.out -DINIT_LIST
+// RUN: %{build} -o %t.out -DINIT_LIST -ffp-model=precise
 // RUN: %{run} %t.out
 
+// -ffp-model=precise is added to not depend on compiler defaults.
+
 #include "common.hpp"
 #include <cstddef>
 #define SG_SZ 16
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -7,14 +7,15 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -o %t_gpu.out -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t_gpu.out -DMANUAL_UNROLL
 // RUN: %if gpu %{ %{run} %t_gpu.out %}
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -o %t_cpu.out -DMANUAL_UNROLL -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
+// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t_cpu.out -DMANUAL_UNROLL -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
 // RUN: %if cpu %{ %{run} %t_cpu.out %}
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "common.hpp"
 #include <cstddef>
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: matrix, gpu
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL
+// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "common.hpp"
 #include <cstddef>