@@ -321,38 +321,49 @@ defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"
321
321
defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used).
322
322
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
323
323
324
- defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
325
- AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_device_id,
326
- AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectExt_intel_free_memory];
324
+ defvar HipMinAspects = [AspectGpu, AspectFp16, AspectFp64,
325
+ AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
326
+ AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth,
327
+ AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate,
328
+ AspectExt_intel_memory_bus_width, AspectExt_intel_free_memory];
327
329
330
+ defvar HipUSMAspects = !listremove(AllUSMAspects, [AspectUsm_system_allocations]);
331
+ defvar HipGraphAspects = [AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph];
328
332
// The following AMDGCN targets are ordered based on their ROCm driver support:
329
333
//
330
334
// Officially supported:
331
- def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
332
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), HipSubgroupSizesCDNA>;
333
- def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
334
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
335
+ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(
336
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
337
+ [AspectExt_intel_device_info_uuid]), HipSubgroupSizesCDNA>;
338
+ def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(
339
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
340
+ [AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_native_assert]),
335
341
HipSubgroupSizesCDNA>;
336
342
// TODO: Need to verify whether device-side asserts (oneapi_native_assert) are
337
343
// now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the
338
344
// other supported, gfx1030 and gfx1100, RDNA3 GPUs.
339
- def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
340
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
345
+ def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(
346
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
347
+ [AspectExt_intel_device_info_uuid]),
341
348
HipSubgroupSizesCDNA>;
342
- def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects,
343
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
349
+ def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(
350
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
351
+ [AspectExt_intel_device_info_uuid]),
344
352
HipSubgroupSizesCDNA>;
345
- def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects,
346
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
353
+ def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(
354
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
355
+ [AspectExt_intel_device_info_uuid]),
347
356
HipSubgroupSizesCDNA>;
348
- def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
349
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
357
+ def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(
358
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
359
+ [AspectAtomic64, AspectExt_intel_device_info_uuid]),
350
360
HipSubgroupSizesRDNA>;
351
- def : HipTargetInfo<"amd_gpu_gfx1100", !listconcat(HipMinAspects, AllUSMAspects,
352
- [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
361
+ def : HipTargetInfo<"amd_gpu_gfx1100", !listconcat(
362
+ HipMinAspects, HipUSMAspects, HipGraphAspects,
363
+ [AspectExt_intel_device_info_uuid]),
353
364
HipSubgroupSizesRDNA>;
354
365
// Deprecated support:
355
- def : HipTargetInfo<"amd_gpu_gfx906", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesGCN5>;
366
+ def : HipTargetInfo<"amd_gpu_gfx906", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesGCN5>;
356
367
// Unsupported (or unofficially supported):
357
368
def : HipTargetInfo<"amd_gpu_gfx700", HipMinAspects, HipSubgroupSizesGCN2>;
358
369
def : HipTargetInfo<"amd_gpu_gfx701", HipMinAspects, HipSubgroupSizesGCN2>;
@@ -369,23 +380,23 @@ def : HipTargetInfo<"amd_gpu_gfx900", HipMinAspects, HipSubgroupSizesGCN5>;
369
380
def : HipTargetInfo<"amd_gpu_gfx902", HipMinAspects, HipSubgroupSizesGCN5>;
370
381
def : HipTargetInfo<"amd_gpu_gfx904", HipMinAspects, HipSubgroupSizesGCN5>;
371
382
def : HipTargetInfo<"amd_gpu_gfx909", HipMinAspects, HipSubgroupSizesGCN5>;
372
- def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesGCN5>;
373
- def : HipTargetInfo<"amd_gpu_gfx1010", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
374
- def : HipTargetInfo<"amd_gpu_gfx1011", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
375
- def : HipTargetInfo<"amd_gpu_gfx1012", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
376
- def : HipTargetInfo<"amd_gpu_gfx1013", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
377
- def : HipTargetInfo<"amd_gpu_gfx1031", !listconcat(!listremove(HipMinAspects, [AspectExt_intel_free_memory]), AllUSMAspects ),
383
+ def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesGCN5>;
384
+ def : HipTargetInfo<"amd_gpu_gfx1010", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
385
+ def : HipTargetInfo<"amd_gpu_gfx1011", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
386
+ def : HipTargetInfo<"amd_gpu_gfx1012", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
387
+ def : HipTargetInfo<"amd_gpu_gfx1013", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
388
+ def : HipTargetInfo<"amd_gpu_gfx1031", !listconcat(!listremove(HipMinAspects, [AspectExt_intel_free_memory]), HipUSMAspects ),
378
389
HipSubgroupSizesRDNA>;
379
- def : HipTargetInfo<"amd_gpu_gfx1032", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
380
- def : HipTargetInfo<"amd_gpu_gfx1033", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
381
- def : HipTargetInfo<"amd_gpu_gfx1034", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
382
- def : HipTargetInfo<"amd_gpu_gfx1035", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
383
- def : HipTargetInfo<"amd_gpu_gfx1036", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
384
- def : HipTargetInfo<"amd_gpu_gfx1101", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
385
- def : HipTargetInfo<"amd_gpu_gfx1102", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
386
- def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
387
- def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
388
- def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects ), HipSubgroupSizesRDNA>;
390
+ def : HipTargetInfo<"amd_gpu_gfx1032", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
391
+ def : HipTargetInfo<"amd_gpu_gfx1033", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
392
+ def : HipTargetInfo<"amd_gpu_gfx1034", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
393
+ def : HipTargetInfo<"amd_gpu_gfx1035", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
394
+ def : HipTargetInfo<"amd_gpu_gfx1036", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
395
+ def : HipTargetInfo<"amd_gpu_gfx1101", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
396
+ def : HipTargetInfo<"amd_gpu_gfx1102", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
397
+ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
398
+ def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
399
+ def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, HipUSMAspects ), HipSubgroupSizesRDNA>;
389
400
// TBA
390
401
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
391
402
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4
0 commit comments