Skip to content

Commit 8096a6f

Browse files
authored
[SYCL][CUDA] Allow FTZ, prec-sqrt to override no-ftz, no-prec-sqrt (#7616)
If two bc files are compiled with different values for flags `nvvm-reflect-ftz` or `nvvm-reflect-prec-sqrt` then llvm-link will emit an error for the conflicting module flags. This instead allows FTZ=true to override FTZ=false, and the same with `prec-sqrt`. This was blocking ftz=true from being used in SYCL for CUDA backend as the `llvm/libdevice` library is compiled with default ftz value, meaning introducing a non default value for `nvvm-reflect-ftz` will fail at `llvm-link` time. An alternative is to introduce a clang flag that will completely omit nvvm-reflect module flags, which can then be used when compiling libdevice.
1 parent fdf8ac7 commit 8096a6f

File tree

4 files changed

+11
-12
lines changed

4 files changed

+11
-12
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -872,12 +872,11 @@ void CodeGenModule::Release() {
872872
// Indicate whether __nvvm_reflect should be configured to flush denormal
873873
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
874874
// property.)
875-
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
876-
(CodeGenOpts.FP32DenormalMode.Output !=
877-
llvm::DenormalMode::IEEE) ||
878-
(CodeGenOpts.FPDenormalMode.Output !=
879-
llvm::DenormalMode::IEEE));
880-
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-prec-sqrt",
875+
getModule().addModuleFlag(
876+
llvm::Module::Max, "nvvm-reflect-ftz",
877+
(CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE) ||
878+
(CodeGenOpts.FPDenormalMode.Output != llvm::DenormalMode::IEEE));
879+
getModule().addModuleFlag(llvm::Module::Max, "nvvm-reflect-prec-sqrt",
881880
getTarget().getTargetOpts().NVVMCudaPrecSqrt);
882881
}
883882

clang/test/CodeGenCUDA/flush-denormals.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ extern "C" __device__ void foo() {}
4545
// NOFTZ-NOT: "denormal-fp-math-f32"
4646

4747
// PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
48-
// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
48+
// PTXFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}
4949

5050
// PTXNOFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
51-
// PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
51+
// PTXNOFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 0}

clang/test/CodeGenCUDA/nvvm-reflect-prec-sqrt.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77

88
extern "C" __device__ void foo() {}
99

10-
// CHECK-ON: !{i32 4, !"nvvm-reflect-prec-sqrt", i32 1}
11-
// CHECK-OFF: !{i32 4, !"nvvm-reflect-prec-sqrt", i32 0}
10+
// CHECK-ON: !{i32 7, !"nvvm-reflect-prec-sqrt", i32 1}
11+
// CHECK-OFF: !{i32 7, !"nvvm-reflect-prec-sqrt", i32 0}

clang/test/CodeGenSYCL/flush-denormals.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ void foo() {}
1111

1212
// FTZ32: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
1313
// PTXFTZ32:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
14-
// PTXFTZ32:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
14+
// PTXFTZ32:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}
1515

1616
// FTZ: attributes #0 = {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
1717
// PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
18-
// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
18+
// PTXFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}

0 commit comments

Comments
 (0)