Skip to content

Commit e384e94

Browse files
committed
Revert "[HIP] Change default --gpu-max-threads-per-block value to 1024"
This reverts commit 187658b due to AMDGPU backend issues.
1 parent 79829a4 commit e384e94

File tree

4 files changed

+5
-9
lines changed

4 files changed

+5
-9
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
240240
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
241241
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
242242
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
243-
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
243+
LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
244244

245245
LANGOPT(SYCL , 1, 0, "SYCL")
246246
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9059,13 +9059,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
90599059
assert(Max == 0 && "Max must be zero");
90609060
} else if (IsOpenCLKernel || IsHIPKernel) {
90619061
// By default, restrict the maximum size to a value specified by
9062-
// --gpu-max-threads-per-block=n or its default value for HIP.
9063-
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
9064-
const unsigned DefaultMaxWorkGroupSize =
9065-
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
9066-
: M.getLangOpts().GPUMaxThreadsPerBlock;
9062+
// --gpu-max-threads-per-block=n or its default value.
90679063
std::string AttrVal =
9068-
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
9064+
std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
90699065
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
90709066
}
90719067

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
3939
// NAMD-NOT: "amdgpu-num-vgpr"
4040
// NAMD-NOT: "amdgpu-num-sgpr"
4141

42-
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
42+
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
4343
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
4444
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
4545
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"

clang/test/CodeGenCUDA/kernel-amdgcn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ int main() {
3939
launch((void*)D.Empty());
4040
return 0;
4141
}
42-
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
42+
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"

0 commit comments

Comments
 (0)