Skip to content

Commit fae233c

Browse files
authored
[OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (llvm#73864)
If we don't have a team reduction we don't need a kernel launch environment (for now). In that case we can avoid the cost.
1 parent 0737be3 commit fae233c

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,9 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
804804
CGM.getTypes().ConvertTypeForMem(StaticTy);
805805
const auto &DL = CGM.getModule().getDataLayout();
806806
uint64_t ReductionDataSize =
807-
DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
807+
TeamsReductions.empty()
808+
? 0
809+
: DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
808810
CGBuilderTy &Bld = CGF.Builder;
809811
OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
810812
C.getLangOpts().OpenMPCUDAReductionBufNum);

openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,10 @@ GenericKernelTy::getKernelLaunchEnvironment(
464464
if (isCtorOrDtor() || RecordReplay.isReplaying())
465465
return nullptr;
466466

467+
if (!KernelEnvironment.Configuration.ReductionDataSize ||
468+
!KernelEnvironment.Configuration.ReductionBufferLength)
469+
return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0);
470+
467471
// TODO: Check if the kernel needs a launch environment.
468472
auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
469473
/*HostPtr=*/nullptr,
@@ -478,8 +482,7 @@ GenericKernelTy::getKernelLaunchEnvironment(
478482
/// async data transfer.
479483
auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment;
480484
LocalKLE = KernelLaunchEnvironment;
481-
if (KernelEnvironment.Configuration.ReductionDataSize &&
482-
KernelEnvironment.Configuration.ReductionBufferLength) {
485+
{
483486
auto AllocOrErr = GenericDevice.dataAlloc(
484487
KernelEnvironment.Configuration.ReductionDataSize *
485488
KernelEnvironment.Configuration.ReductionBufferLength,

0 commit comments

Comments
 (0)