Skip to content

Commit c7ff25b

Browse files
dhruvachakronlieb
authored andcommitted
[OpenMP] Allow low tripcount optimization for Xteam Reductions.
Change-Id: I4bbd46fade68eaea3adf7ea4089ea32bb51e5319
1 parent f51d8b7 commit c7ff25b

File tree

1 file changed

+19
-4
lines changed
  • openmp/libomptarget/plugins-nextgen/amdgpu/src

1 file changed

+19
-4
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -770,8 +770,15 @@ struct AMDGPUKernelTy : public GenericKernelTy {
770770
uint64_t LoopTripCount, uint32_t ThreadLimitClause[3]) const override {
771771
uint32_t NumThreads = BlockSize;
772772

773-
// If there is an override already, do nothing
774-
if (NumThreads != GenericDevice.getDefaultNumThreads() &&
773+
// If there is an override already, do nothing. Note the different
774+
// default for Xteam Reductions.
775+
if (!isXTeamReductionsMode() &&
776+
NumThreads != GenericDevice.getDefaultNumThreads() &&
777+
NumThreads != ConstWGSize)
778+
return std::make_pair(false, NumThreads);
779+
780+
if (isXTeamReductionsMode() &&
781+
NumThreads != llvm::omp::xteam_red::DefaultBlockSize &&
775782
NumThreads != ConstWGSize)
776783
return std::make_pair(false, NumThreads);
777784

@@ -788,13 +795,21 @@ struct AMDGPUKernelTy : public GenericKernelTy {
788795
if ((ThreadLimitClause[0] > 0) && (ThreadLimitClause[0] != (uint32_t)-1))
789796
return std::make_pair(false, NumThreads);
790797

791-
// If generic, generic-SPMD, or Xteam reduction kernel, do nothing.
792-
if (isGenericMode() || isGenericSPMDMode() || isXTeamReductionsMode())
798+
// If generic or generic-SPMD kernel, do nothing.
799+
if (isGenericMode() || isGenericSPMDMode())
793800
return std::make_pair(false, NumThreads);
794801

795802
// Reduce the blocksize as long as it is above the tunable limit.
796803
while (NumThreads > GenericDevice.getOMPXSmallBlockSize())
797804
NumThreads >>= 1;
805+
806+
if (NumThreads == 0)
807+
return std::make_pair(false, BlockSize);
808+
809+
if (isXTeamReductionsMode())
810+
return std::make_pair(true,
811+
llvm::omp::getBlockSizeAsPowerOfTwo(NumThreads));
812+
798813
return std::make_pair(true, NumThreads);
799814
}
800815

0 commit comments

Comments
 (0)