-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang] Automatically enable -fconvergent-functions
on GPU targets
#111076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Summary: This patch causes us to respect the `-fconvergent-functions` and `-fno-convergent-functions` options correctly. GPU targets should have this set all the time, but we now offer `-fno-convergent-functions` to opt-out if you want to test broken behavior. This munged about with a lot of the old weird logic, but I don't think it makes any real changes.
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 171.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111076.diff 13 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9d183ff2d69b3c..445e681313eba7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1195,10 +1195,11 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
def c : Flag<["-"], "c">, Flags<[NoXarchOption]>,
Visibility<[ClangOption, FlangOption]>, Group<Action_Group>,
HelpText<"Only run preprocess, compile, and assemble steps">;
-defm convergent_functions : BoolFOption<"convergent-functions",
- LangOpts<"ConvergentFunctions">, DefaultFalse,
- NegFlag<SetFalse, [], [ClangOption], "Assume all functions may be convergent.">,
- PosFlag<SetTrue, [], [ClangOption, CC1Option]>>;
+def fconvergent_functions : Flag<["-"], "fconvergent-functions">,
+ Visibility<[ClangOption, CC1Option]>,
+ HelpText< "Assume all functions may be convergent.">;
+def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,
+ Visibility<[ClangOption, CC1Option]>;
// Common offloading options
let Group = offload_Group in {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b9987288d82d10..70643baba2df91 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6258,8 +6258,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_unique_internal_linkage_names);
Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
options::OPT_fno_unique_basic_block_section_names);
- Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
- options::OPT_fno_convergent_functions);
if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
@@ -6276,6 +6274,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
options::OPT_finstrument_functions_after_inlining,
options::OPT_finstrument_function_entry_bare);
+ Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
+ options::OPT_fno_convergent_functions);
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
// for sampling, overhead of call arc collection is way too high and there's
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index a0291ccfea2458..2b5b6a8d2c5911 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3687,10 +3687,10 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200))
GenerateArg(Consumer, OPT_fblocks);
- if (Opts.ConvergentFunctions &&
- !(Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice ||
- Opts.HLSL))
+ if (Opts.ConvergentFunctions)
GenerateArg(Consumer, OPT_fconvergent_functions);
+ else
+ GenerateArg(Consumer, OPT_fno_convergent_functions);
if (Opts.NoBuiltin && !Opts.Freestanding)
GenerateArg(Consumer, OPT_fno_builtin);
@@ -4106,9 +4106,10 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
&& Opts.OpenCLVersion == 200);
- Opts.ConvergentFunctions = Args.hasArg(OPT_fconvergent_functions) ||
- Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice || Opts.HLSL;
+ Opts.ConvergentFunctions = Args.hasFlag(
+ OPT_fconvergent_functions, OPT_fno_convergent_functions,
+ Opts.OpenMPIsTargetDevice || T.isAMDGPU() || T.isNVPTX() || Opts.OpenCL ||
+ Opts.CUDAIsDevice || Opts.SYCLIsDevice || Opts.HLSL);
Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
if (!Opts.NoBuiltin)
@@ -4164,9 +4165,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
bool IsTargetSpecified =
Opts.OpenMPIsTargetDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
- Opts.ConvergentFunctions =
- Opts.ConvergentFunctions || Opts.OpenMPIsTargetDevice;
-
if (Opts.OpenMP || Opts.OpenMPSimd) {
if (int Version = getLastArgIntValue(
Args, OPT_fopenmp_version_EQ,
diff --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c
index dcf79ad4e0e6a5..7dbd9f1321e280 100644
--- a/clang/test/CodeGen/nvptx_attributes.c
+++ b/clang/test/CodeGen/nvptx_attributes.c
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
-// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK: Function Attrs: convergent noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@foo
// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index b967701ca1fa96..d0c87d9dfda5f7 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -106,9 +106,9 @@ const B& f(A *a) {
//.
-// CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
-// CHECK: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR3]] = { nounwind }
// CHECK: attributes #[[ATTR4]] = { noreturn }
//.
diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index 8a8c8a02aaef06..5378ff18b34c9d 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -3782,7 +3782,7 @@ int bar(int n){
// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK9-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -4561,7 +4561,7 @@ int bar(int n){
// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK11-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 4bde6a4cfda3fb..e4570dbb1efcd7 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -9131,7 +9131,7 @@ int bar(int n){
// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -9935,7 +9935,7 @@ int bar(int n){
// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -10739,7 +10739,7 @@ int bar(int n){
// CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK21-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -11629,7 +11629,7 @@ int bar(int n){
// CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
-// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
+// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK23-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index 00a900d441b030..78e40e54671ac3 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -2718,17 +2718,17 @@ int main() {
// CHECK13: omp.arraycpy.body:
// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]]
-// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]]
+// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]])
+// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]])
+// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR3:[0-9]+]]
// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]]
// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]]
// CHECK13: omp.arraycpy.done3:
-// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]])
+// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]])
+// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR3]]
// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -2778,14 +2778,14 @@ int main() {
// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
-// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR3]]
// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0
// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK13: arraydestroy.body:
// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
// CHECK13: arraydestroy.done8:
@@ -2798,7 +2798,7 @@ int main() {
// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK13-NEXT: call void @_ZN2StC2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]]
+// CHECK13-NEXT: call void @_ZN2StC2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]])
// CHECK13-NEXT: ret void
//
//
@@ -2813,7 +2813,7 @@ int main() {
// CHECK13-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8
// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK13-NEXT: call void @_ZN1SIfEC2ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef [[T]]) #[[ATTR4]]
+// CHECK13-NEXT: call void @_ZN1SIfEC2ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef [[T]])
// CHECK13-NEXT: ret void
//
//
@@ -2823,7 +2823,7 @@ int main() {
// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK13-NEXT: call void @_ZN2StD2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN2StD2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]]
// CHECK13-NEXT: ret void
//
//
@@ -2881,17 +2881,17 @@ int main() {
// CHECK13: omp.arraycpy.body:
// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]])
+// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]])
+// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR3]]
// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]]
// CHECK13: omp.arraycpy.done4:
-// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]]
-// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]]
+// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]])
+// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]])
+// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR3]]
// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -2948,14 +2948,14 @@ int main() {
// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
-// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4...
[truncated]
|
You may legitimately know there are no convergent functions in the TU. We also have the noconvergent source attribute now for this |
Think it would be useful to put that on functions in the wrapper headers that definitely aren't convergent? E.g. getting a thread id. |
You could, but it's trivially inferable in those cases anyway |
Summary:
This patch causes us to respect the
-fconvergent-functions
and-fno-convergent-functions
options correctly. GPU targets should havethis set all the time, but we now offer
-fno-convergent-functions
toopt-out if you want to test broken behavior. This munged about with a
lot of the old weird logic, but I don't think it makes any real changes.