@@ -275,7 +275,11 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
275
275
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
276
276
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
277
277
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm")
278
- defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
278
+ // According to the "Accelerator and GPU hardware specifications table" docs,
279
+ // (see: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html)
280
+ // the ROCm driver selects wave32 mode for the gfx10 and gfx11 family of GPUs.
281
+ // Also, see relevant ROCm issue: https://github.com/ROCm/hipamd/issues/59
282
+ defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used).
279
283
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
280
284
281
285
defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
@@ -290,9 +294,18 @@ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
290
294
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
291
295
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
292
296
HipSubgroupSizesCDNA>;
297
+ // TODO: Need to verify whether device-side asserts (oneapi_native_assert) are
298
+ // now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the
299
+ // other supported, gfx1030 and gfx1100, RDNA3 GPUs.
293
300
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
294
301
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
295
302
HipSubgroupSizesCDNA>;
303
+ def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects,
304
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
305
+ HipSubgroupSizesCDNA>;
306
+ def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects,
307
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
308
+ HipSubgroupSizesCDNA>;
296
309
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
297
310
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
298
311
HipSubgroupSizesRDNA>;
@@ -332,7 +345,5 @@ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects)
332
345
def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
333
346
def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
334
347
// TBA
335
- def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
336
- def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
337
348
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
338
349
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4
0 commit comments