Skip to content

Commit 8c5555c

Browse files
committed
[OPENMP][NVPTX]Mark more functions as always_inline for better
performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269
1 parent 78c3f58 commit 8c5555c

23 files changed

+305
-203
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,9 +1274,11 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
12741274
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
12751275
Name, &CGM.getModule());
12761276
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1277-
Fn->removeFnAttr(llvm::Attribute::NoInline);
1278-
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1279-
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1277+
if (CGM.getLangOpts().Optimize) {
1278+
Fn->removeFnAttr(llvm::Attribute::NoInline);
1279+
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1280+
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1281+
}
12801282
CodeGenFunction CGF(CGM);
12811283
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
12821284
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
@@ -4671,9 +4673,11 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
46714673
&CGM.getModule());
46724674
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
46734675
TaskPrivatesMapFnInfo);
4674-
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4675-
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4676-
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4676+
if (CGM.getLangOpts().Optimize) {
4677+
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4678+
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4679+
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4680+
}
46774681
CodeGenFunction CGF(CGM);
46784682
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
46794683
TaskPrivatesMapFnInfo, Args, Loc, Loc);

clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,6 +1929,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
19291929
auto *OutlinedFun =
19301930
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
19311931
D, ThreadIDVar, InnermostKind, CodeGen));
1932+
if (CGM.getLangOpts().Optimize) {
1933+
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1934+
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1935+
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1936+
}
19321937
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
19331938
IsInTTDRegion = PrevIsInTTDRegion;
19341939
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
@@ -2045,9 +2050,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
20452050
CodeGen.setAction(Action);
20462051
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
20472052
D, ThreadIDVar, InnermostKind, CodeGen);
2048-
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
2049-
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
2050-
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
2053+
if (CGM.getLangOpts().Optimize) {
2054+
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
2055+
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
2056+
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
2057+
}
20512058

20522059
return OutlinedFun;
20532060
}
@@ -3422,6 +3429,12 @@ static llvm::Function *emitShuffleAndReduceFunction(
34223429
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
34233430
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
34243431
Fn->setDoesNotRecurse();
3432+
if (CGM.getLangOpts().Optimize) {
3433+
Fn->removeFnAttr(llvm::Attribute::NoInline);
3434+
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
3435+
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
3436+
}
3437+
34253438
CodeGenFunction CGF(CGM);
34263439
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
34273440

clang/test/OpenMP/declare_target_codegen_globalization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
2-
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
2+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s
33
// expected-no-diagnostics
44

55
int foo(int &a) { return a; }

0 commit comments

Comments
 (0)