Skip to content

Commit 187658b

Browse files
committed
Recommit "[HIP] Change default --gpu-max-threads-per-block value to 1024"
Recommit 04abbb3
1 parent b9f2b3b commit 187658b

File tree

4 files changed

+9
-5
lines changed

4 files changed

+9
-5
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
240240
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
241241
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
242242
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
243-
LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
243+
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
244244

245245
LANGOPT(SYCL , 1, 0, "SYCL")
246246
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9060,9 +9060,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
90609060
assert(Max == 0 && "Max must be zero");
90619061
} else if (IsOpenCLKernel || IsHIPKernel) {
90629062
// By default, restrict the maximum size to a value specified by
9063-
// --gpu-max-threads-per-block=n or its default value.
9063+
// --gpu-max-threads-per-block=n or its default value for HIP.
9064+
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
9065+
const unsigned DefaultMaxWorkGroupSize =
9066+
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
9067+
: M.getLangOpts().GPUMaxThreadsPerBlock;
90649068
std::string AttrVal =
9065-
std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
9069+
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
90669070
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
90679071
}
90689072

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
3939
// NAMD-NOT: "amdgpu-num-vgpr"
4040
// NAMD-NOT: "amdgpu-num-sgpr"
4141

42-
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
42+
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
4343
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
4444
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
4545
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"

clang/test/CodeGenCUDA/kernel-amdgcn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ int main() {
3939
launch((void*)D.Empty());
4040
return 0;
4141
}
42-
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
42+
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"

0 commit comments

Comments
 (0)