[SYCL][CUDA] Allow FTZ, prec-sqrt to override no-ftz, no-prec-sqrt (#7616)

hdelan · web-flow · commit 8096a6fb7f5a · 2023-01-06T12:40:00.000-08:00
If two bc files are compiled with different values for flags
`nvvm-reflect-ftz` or `nvvm-reflect-prec-sqrt` then llvm-link will emit
an error for the conflicting module flags. This instead allows FTZ=true
to override FTZ=false, and the same with `prec-sqrt`.

This was blocking ftz=true from being used in SYCL for CUDA backend as
the `llvm/libdevice` library is compiled with default ftz value, meaning
introducing a non default value for `nvvm-reflect-ftz` will fail at
`llvm-link` time.

An alternative is to introduce a clang flag that will completely omit
nvvm-reflect module flags, which can then be used when compiling
libdevice.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -872,12 +872,11 @@ void CodeGenModule::Release() {
     // Indicate whether __nvvm_reflect should be configured to flush denormal
     // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
     // property.)
-    getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
-                              (CodeGenOpts.FP32DenormalMode.Output !=
-                                  llvm::DenormalMode::IEEE) ||
-                              (CodeGenOpts.FPDenormalMode.Output !=
-                                  llvm::DenormalMode::IEEE));
-    getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-prec-sqrt",
+    getModule().addModuleFlag(
+        llvm::Module::Max, "nvvm-reflect-ftz",
+        (CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE) ||
+            (CodeGenOpts.FPDenormalMode.Output != llvm::DenormalMode::IEEE));
+    getModule().addModuleFlag(llvm::Module::Max, "nvvm-reflect-prec-sqrt",
                               getTarget().getTargetOpts().NVVMCudaPrecSqrt);
   }
 
diff --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu
@@ -45,7 +45,7 @@ extern "C" __device__ void foo() {}
 // NOFTZ-NOT: "denormal-fp-math-f32"
 
 // PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
-// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+// PTXFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}
 
 // PTXNOFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
-// PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
+// PTXNOFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 0}
diff --git a/clang/test/CodeGenCUDA/nvvm-reflect-prec-sqrt.cu b/clang/test/CodeGenCUDA/nvvm-reflect-prec-sqrt.cu
@@ -7,5 +7,5 @@
 
 extern "C" __device__ void foo() {}
 
-// CHECK-ON: !{i32 4, !"nvvm-reflect-prec-sqrt", i32 1}
-// CHECK-OFF: !{i32 4, !"nvvm-reflect-prec-sqrt", i32 0}
+// CHECK-ON: !{i32 7, !"nvvm-reflect-prec-sqrt", i32 1}
+// CHECK-OFF: !{i32 7, !"nvvm-reflect-prec-sqrt", i32 0}
diff --git a/clang/test/CodeGenSYCL/flush-denormals.cpp b/clang/test/CodeGenSYCL/flush-denormals.cpp
@@ -11,8 +11,8 @@ void foo() {}
 
 // FTZ32: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 // PTXFTZ32:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
-// PTXFTZ32:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+// PTXFTZ32:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}
 
 // FTZ: attributes #0 = {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
 // PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
-// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+// PTXFTZ:[[MODFLAG]] = !{i32 7, !"nvvm-reflect-ftz", i32 1}