Skip to content

Commit 04abbb3

Browse files
committed
[HIP] Change default --gpu-max-threads-per-block value to 1024
Differential Revision: https://reviews.llvm.org/D76795
1 parent 3eb4bf1 commit 04abbb3

File tree

4 files changed

+9
-5
lines changed

4 files changed

+9
-5
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
238238
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
239239
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
240240
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
241-
LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
241+
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
242242

243243
LANGOPT(SYCL , 1, 0, "SYCL")
244244
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8815,9 +8815,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
88158815
assert(Max == 0 && "Max must be zero");
88168816
} else if (IsOpenCLKernel || IsHIPKernel) {
88178817
// By default, restrict the maximum size to a value specified by
8818-
// --gpu-max-threads-per-block=n or its default value.
8818+
// --gpu-max-threads-per-block=n or its default value for HIP.
8819+
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
8820+
const unsigned DefaultMaxWorkGroupSize =
8821+
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
8822+
: M.getLangOpts().GPUMaxThreadsPerBlock;
88198823
std::string AttrVal =
8820-
std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
8824+
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
88218825
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
88228826
}
88238827

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
3939
// NAMD-NOT: "amdgpu-num-vgpr"
4040
// NAMD-NOT: "amdgpu-num-sgpr"
4141

42-
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
42+
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
4343
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
4444
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
4545
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"

clang/test/CodeGenCUDA/kernel-amdgcn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ int main() {
3939
launch((void*)D.Empty());
4040
return 0;
4141
}
42-
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
42+
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"

0 commit comments

Comments
 (0)