Skip to content

Commit bfbf8ab

Browse files
[SYCL][Driver] Force precise division rounding for precise model (#12107)
This commit makes the driver pass `-cl-fp32-correctly-rounded-divide-sqrt` as backend compiler options for SPIR-V targets when the `-fpp-model=precise` option is used. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent d2874e6 commit bfbf8ab

File tree

4 files changed

+61
-0
lines changed

4 files changed

+61
-0
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,6 +1246,11 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
12461246
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
12471247
if (A->getOption().matches(options::OPT_O0))
12481248
BeArgs.push_back("-cl-opt-disable");
1249+
// In precise floating-point mode we pass the OpenCL flag forcing division to
1250+
// be correctly rounded.
1251+
if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ))
1252+
if (StringRef{A->getValue()}.equals("precise"))
1253+
BeArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt");
12491254
StringRef RegAllocModeOptName = "-ftarget-register-alloc-mode=";
12501255
if (Arg *A = Args.getLastArg(options::OPT_ftarget_register_alloc_mode_EQ)) {
12511256
StringRef RegAllocModeVal = A->getValue(0);

clang/test/Driver/sycl-offload-aot.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,18 @@
220220
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS-GEN %s
221221
// CHK-TOOLS-IMPLIED-OPTS-GEN: ocloc{{.*}} "-options" "-g -cl-opt-disable" "-DFOO1" "-DFOO2"
222222

223+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
224+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-FPGA %s
225+
// CHK-TOOLS-IMPLIED-ROUNDING-FPGA: opencl-aot{{.*}} "--bo=-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
226+
227+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
228+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-CPU %s
229+
// CHK-TOOLS-IMPLIED-ROUNDING-CPU: opencl-aot{{.*}} "--bo=-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
230+
231+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_gen-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
232+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-GEN %s
233+
// CHK-TOOLS-IMPLIED-ROUNDING-GEN: ocloc{{.*}} "-options" "-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
234+
223235
/// Check -Xsycl-target-linker option passing
224236
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -Xsycl-target-linker "-DFOO1 -DFOO2" %s 2>&1 \
225237
// RUN: | FileCheck -check-prefix=CHK-TOOLS-FPGA-OPTS2 %s

clang/test/Driver/sycl-offload.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,11 @@
424424
// CHK-TOOLS-IMPLIED-OPTS-O0-NOT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-opt-disable"
425425
// CHK-TOOLS-IMPLIED-OPTS-O2-NOT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-opt-disable"
426426

427+
/// Check for implied options (-ffp-model=precise)
428+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64 -ffp-model=precise %s 2>&1 \
429+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING %s
430+
// CHK-TOOLS-IMPLIED-ROUNDING: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-fp32-correctly-rounded-divide-sqrt
431+
427432
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown -Xsycl-target-linker "-DFOO1 -DFOO2" %s 2>&1 \
428433
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s
429434
// CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2"
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %{build} -ffp-model=precise -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// Tests that -ffp-model=precise causes floating point division to be the same
5+
// on device and host.
6+
7+
#include <sycl.hpp>
8+
9+
constexpr size_t NumElems = 1024;
10+
11+
int main() {
12+
sycl::queue Q;
13+
float *InData = sycl::malloc_shared<float>(NumElems, Q);
14+
float *OutData = sycl::malloc_shared<float>(NumElems, Q);
15+
16+
for (size_t I = 0; I < NumElems; ++I) {
17+
InData[I] = float(I) + 1.0f;
18+
OutData[I] = 0.0f;
19+
}
20+
21+
Q.parallel_for(sycl::range<1>(NumElems), [=](sycl::id<1> Idx) {
22+
OutData[Idx] = InData[Idx] / InData[NumElems - Idx - 1];
23+
}).wait_and_throw();
24+
25+
size_t NumFails = 0;
26+
for (size_t I = 0; I < NumElems; ++I) {
27+
float Expected = InData[I] / InData[NumElems - I - 1];
28+
if (OutData[I] != Expected) {
29+
std::cout << "Unexpected result for element " << I << ": " << OutData[I]
30+
<< " != " << Expected << std::endl;
31+
++NumFails;
32+
}
33+
}
34+
35+
sycl::free(InData, Q);
36+
sycl::free(OutData, Q);
37+
38+
return NumFails;
39+
}

0 commit comments

Comments
 (0)