Skip to content

Commit d8f2251

Browse files
authored
[Clang] Automatically enable -fconvergent-functions on GPU targets (#111076)
Summary: This patch causes us to respect the `-fconvergent-functions` and `-fno-convergent-functions` options correctly. GPU targets should have this set all the time, but we now offer `-fno-convergent-functions` to opt-out if you want to test broken behavior. This munged about with a lot of the old weird logic, but I don't think it makes any real changes.
1 parent 8d661fd commit d8f2251

13 files changed

+290
-289
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,10 +1195,11 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
11951195
def c : Flag<["-"], "c">, Flags<[NoXarchOption]>,
11961196
Visibility<[ClangOption, FlangOption]>, Group<Action_Group>,
11971197
HelpText<"Only run preprocess, compile, and assemble steps">;
1198-
defm convergent_functions : BoolFOption<"convergent-functions",
1199-
LangOpts<"ConvergentFunctions">, DefaultFalse,
1200-
NegFlag<SetFalse, [], [ClangOption], "Assume all functions may be convergent.">,
1201-
PosFlag<SetTrue, [], [ClangOption, CC1Option]>>;
1198+
def fconvergent_functions : Flag<["-"], "fconvergent-functions">,
1199+
Visibility<[ClangOption, CC1Option]>,
1200+
HelpText< "Assume all functions may be convergent.">;
1201+
def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,
1202+
Visibility<[ClangOption, CC1Option]>;
12021203

12031204
// Common offloading options
12041205
let Group = offload_Group in {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6280,8 +6280,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
62806280
options::OPT_fno_unique_internal_linkage_names);
62816281
Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
62826282
options::OPT_fno_unique_basic_block_section_names);
6283-
Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
6284-
options::OPT_fno_convergent_functions);
62856283

62866284
if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
62876285
options::OPT_fno_split_machine_functions)) {
@@ -6298,6 +6296,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
62986296
Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
62996297
options::OPT_finstrument_functions_after_inlining,
63006298
options::OPT_finstrument_function_entry_bare);
6299+
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
6300+
options::OPT_fno_convergent_functions);
63016301

63026302
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
63036303
// for sampling, overhead of call arc collection is way too high and there's

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3687,10 +3687,10 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
36873687
if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200))
36883688
GenerateArg(Consumer, OPT_fblocks);
36893689

3690-
if (Opts.ConvergentFunctions &&
3691-
!(Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice ||
3692-
Opts.HLSL))
3690+
if (Opts.ConvergentFunctions)
36933691
GenerateArg(Consumer, OPT_fconvergent_functions);
3692+
else
3693+
GenerateArg(Consumer, OPT_fno_convergent_functions);
36943694

36953695
if (Opts.NoBuiltin && !Opts.Freestanding)
36963696
GenerateArg(Consumer, OPT_fno_builtin);
@@ -4106,9 +4106,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
41064106
Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
41074107
&& Opts.OpenCLVersion == 200);
41084108

4109-
Opts.ConvergentFunctions = Args.hasArg(OPT_fconvergent_functions) ||
4110-
Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
4111-
Opts.SYCLIsDevice || Opts.HLSL;
4109+
bool HasConvergentOperations = Opts.OpenMPIsTargetDevice || Opts.OpenCL ||
4110+
Opts.CUDAIsDevice || Opts.SYCLIsDevice ||
4111+
Opts.HLSL || T.isAMDGPU() || T.isNVPTX();
4112+
Opts.ConvergentFunctions =
4113+
Args.hasFlag(OPT_fconvergent_functions, OPT_fno_convergent_functions,
4114+
HasConvergentOperations);
41124115

41134116
Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
41144117
if (!Opts.NoBuiltin)
@@ -4164,9 +4167,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
41644167
bool IsTargetSpecified =
41654168
Opts.OpenMPIsTargetDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
41664169

4167-
Opts.ConvergentFunctions =
4168-
Opts.ConvergentFunctions || Opts.OpenMPIsTargetDevice;
4169-
41704170
if (Opts.OpenMP || Opts.OpenMPSimd) {
41714171
if (int Version = getLastArgIntValue(
41724172
Args, OPT_fopenmp_version_EQ,

clang/test/CodeGen/nvptx_attributes.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
22
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
33

4-
// CHECK: Function Attrs: noinline nounwind optnone
4+
// CHECK: Function Attrs: convergent noinline nounwind optnone
55
// CHECK-LABEL: define {{[^@]+}}@foo
66
// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
77
// CHECK-NEXT: entry:

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ const B& f(A *a) {
106106

107107

108108
//.
109-
// CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
109+
// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
110110
// CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
111-
// CHECK: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
111+
// CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
112112
// CHECK: attributes #[[ATTR3]] = { nounwind }
113113
// CHECK: attributes #[[ATTR4]] = { noreturn }
114114
//.

clang/test/OpenMP/target_parallel_for_codegen.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3782,7 +3782,7 @@ int bar(int n){
37823782
// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
37833783
// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
37843784
// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
3785-
// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
3785+
// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
37863786
// CHECK9-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
37873787
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
37883788
// CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -4561,7 +4561,7 @@ int bar(int n){
45614561
// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
45624562
// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
45634563
// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
4564-
// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
4564+
// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
45654565
// CHECK11-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
45664566
// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
45674567
// CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8

clang/test/OpenMP/target_parallel_for_simd_codegen.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9131,7 +9131,7 @@ int bar(int n){
91319131
// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
91329132
// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
91339133
// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
9134-
// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
9134+
// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
91359135
// CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
91369136
// CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
91379137
// CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -9935,7 +9935,7 @@ int bar(int n){
99359935
// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
99369936
// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
99379937
// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
9938-
// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
9938+
// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
99399939
// CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
99409940
// CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
99419941
// CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -10739,7 +10739,7 @@ int bar(int n){
1073910739
// CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
1074010740
// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1074110741
// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
10742-
// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
10742+
// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
1074310743
// CHECK21-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
1074410744
// CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
1074510745
// CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
@@ -11629,7 +11629,7 @@ int bar(int n){
1162911629
// CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
1163011630
// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1163111631
// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
11632-
// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
11632+
// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
1163311633
// CHECK23-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
1163411634
// CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
1163511635
// CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8

0 commit comments

Comments
 (0)