Skip to content

Commit 7f1ab96

Browse files
authored
[SYCL][Driver] Update support for HIP/AMDGCN targets in DeviceConfigFile (#15414)
This change adds new supported targets, gfx941 and gfx942. It also fixes the supported subgroup size config value for AMD RDNA GPUs, due to the fact that the ROCm driver does not allow support for wave64 mode in HIP, for gfx10 and gx11 family of GPUs, which are based on the RDNA architecture. This is a result of wavefront/cross-lane functions that only work with wave32.
1 parent c7c4130 commit 7f1ab96

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@
5050
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5151
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx940 \
5252
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
53+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx941 \
54+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
55+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx942 \
56+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
5357
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5458
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx1010 \
5559
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
@@ -156,6 +160,12 @@
156160
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx940 \
157161
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
158162
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
163+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx941 \
164+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
165+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
166+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx942 \
167+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
168+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
159169
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx1010 \
160170
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
161171
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,11 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
275275
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
276276
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
277277
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm")
278-
defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
278+
// According to the "Accelerator and GPU hardware specifications table" docs,
279+
// (see: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html)
280+
// the ROCm driver selects wave32 mode for the gfx10 and gfx11 family of GPUs.
281+
// Also, see relevant ROCm issue: https://github.com/ROCm/hipamd/issues/59
282+
defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used).
279283
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
280284

281285
defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
@@ -290,9 +294,18 @@ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
290294
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
291295
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
292296
HipSubgroupSizesCDNA>;
297+
// TODO: Need to verify whether device-side asserts (oneapi_native_assert) are
298+
// now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the
299+
// other supported, gfx1030 and gfx1100, RDNA3 GPUs.
293300
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
294301
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
295302
HipSubgroupSizesCDNA>;
303+
def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects,
304+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
305+
HipSubgroupSizesCDNA>;
306+
def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects,
307+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
308+
HipSubgroupSizesCDNA>;
296309
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
297310
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
298311
HipSubgroupSizesRDNA>;
@@ -332,7 +345,5 @@ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects)
332345
def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
333346
def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
334347
// TBA
335-
def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
336-
def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
337348
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
338349
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4

0 commit comments

Comments
 (0)