Skip to content

Commit 733ad88

Browse files
committed
Update aspect lists in the device config table for AMD and CUDA targets
1 parent df577cc commit 733ad88

File tree

1 file changed

+74
-47
lines changed

1 file changed

+74
-47
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,11 @@ class TargetInfo<string targetName, list<Aspect> aspectList, list<int> subGroupS
110110
// This definition serves the only purpose of testing whether the aspect list defined in here and in SYCL RT match.
111111
def : TargetInfo<"__TestAspectList",
112112
[AspectCpu, AspectGpu, AspectAccelerator, AspectCustom, AspectFp16, AspectFp64, AspectImage, AspectOnline_compiler,
113-
AspectOnline_linker, AspectQueue_profiling, AspectUsm_device_allocations, AspectUsm_host_allocations,
114-
AspectUsm_shared_allocations, AspectUsm_system_allocations, AspectExt_intel_pci_address,
113+
AspectOnline_linker, AspectQueue_profiling, AspectExt_intel_pci_address,
115114
AspectExt_intel_gpu_eu_count, AspectExt_intel_gpu_eu_simd_width, AspectExt_intel_gpu_slices,
116115
AspectExt_intel_gpu_subslices_per_slice, AspectExt_intel_gpu_eu_count_per_subslice,
117116
AspectExt_intel_max_mem_bandwidth, AspectExt_intel_mem_channel, AspectUsm_atomic_host_allocations,
118-
AspectUsm_atomic_shared_allocations, AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb,
117+
AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb,
119118
AspectExt_oneapi_native_assert, AspectHost_debuggable, AspectExt_intel_gpu_hw_threads_per_eu,
120119
AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_bfloat16_math_functions, AspectExt_intel_free_memory,
121120
AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectEmulated,
@@ -165,26 +164,36 @@ class CudaTargetInfo<string targetName, list<Aspect> aspectList, int subGroupSiz
165164
assert !eq(subGroupSize, 32), "sub-group size for Cuda must be equal to 32 and not " # subGroupSize # ".";
166165
}
167166

168-
defvar CudaMinAspects = [AspectGpu, AspectFp16, AspectFp64, AspectQueue_profiling,
169-
AspectUsm_device_allocations, AspectUsm_host_allocations,
170-
AspectUsm_shared_allocations, AspectAtomic64];
167+
defvar CudaMinAspects = !listconcat(AllUSMAspects, [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker,
168+
AspectQueue_profiling, AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_memory_bus_width,
169+
AspectExt_intel_device_info_uuid, AspectExt_oneapi_native_assert, AspectExt_intel_free_memory, AspectExt_intel_device_id,
170+
AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group,
171+
AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]);
171172
// Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
172-
defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import];
173+
defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm,
174+
AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import,
175+
AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy,
176+
AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap, AspectExt_oneapi_cubemap_seamless_filtering];
173177

174178
def : CudaTargetInfo<"nvidia_gpu_sm_50", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
175179
def : CudaTargetInfo<"nvidia_gpu_sm_52", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
176-
def : CudaTargetInfo<"nvidia_gpu_sm_53", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
177-
def : CudaTargetInfo<"nvidia_gpu_sm_60", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
178-
def : CudaTargetInfo<"nvidia_gpu_sm_61", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
179-
def : CudaTargetInfo<"nvidia_gpu_sm_62", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
180-
def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
181-
def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
182-
def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
183-
def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
184-
def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
185-
def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
186-
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
187-
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
180+
def : CudaTargetInfo<"nvidia_gpu_sm_53", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16])>;
181+
def : CudaTargetInfo<"nvidia_gpu_sm_60", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
182+
def : CudaTargetInfo<"nvidia_gpu_sm_61", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
183+
def : CudaTargetInfo<"nvidia_gpu_sm_62", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
184+
def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
185+
def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
186+
def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
187+
def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
188+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
189+
def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
190+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
191+
def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
192+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
193+
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
194+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
195+
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
196+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
188197

189198
//
190199
// HIP / AMDGPU device aspects
@@ -199,12 +208,34 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
199208
// DPCPP does not support AMD targets prior to the gfx7 (GCN2) family.
200209
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
201210
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
202-
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906, gfx90c, GCN 5th gen is also known as Vega
211+
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm")
203212
defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
204213
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
205214

206-
defvar HipMinAspects = [AspectGpu, AspectQueue_profiling, AspectUsm_device_allocations, AspectUsm_host_allocations];
215+
defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
216+
AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_device_id,
217+
AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectExt_intel_free_memory];
207218

219+
// The following AMDGCN targets are ordered based on their ROCm driver support:
220+
//
221+
// Officially supported:
222+
def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
223+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), HipSubgroupSizesCDNA>;
224+
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
225+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
226+
HipSubgroupSizesCDNA>;
227+
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
228+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
229+
HipSubgroupSizesCDNA>;
230+
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
231+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
232+
HipSubgroupSizesRDNA>;
233+
def : HipTargetInfo<"amd_gpu_gfx1100", !listconcat(HipMinAspects, AllUSMAspects,
234+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
235+
HipSubgroupSizesRDNA>;
236+
// Deprecated support:
237+
def : HipTargetInfo<"amd_gpu_gfx906", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesGCN5>;
238+
// Unsupported (or unofficially supported):
208239
def : HipTargetInfo<"amd_gpu_gfx700", HipMinAspects, HipSubgroupSizesGCN2>;
209240
def : HipTargetInfo<"amd_gpu_gfx701", HipMinAspects, HipSubgroupSizesGCN2>;
210241
def : HipTargetInfo<"amd_gpu_gfx702", HipMinAspects, HipSubgroupSizesGCN2>;
@@ -216,30 +247,26 @@ def : HipTargetInfo<"amd_gpu_gfx810", HipMinAspects, HipSubgroupSizesGCN3>;
216247
def : HipTargetInfo<"amd_gpu_gfx900", HipMinAspects, HipSubgroupSizesGCN5>;
217248
def : HipTargetInfo<"amd_gpu_gfx902", HipMinAspects, HipSubgroupSizesGCN5>;
218249
def : HipTargetInfo<"amd_gpu_gfx904", HipMinAspects, HipSubgroupSizesGCN5>;
219-
def : HipTargetInfo<"amd_gpu_gfx906", HipMinAspects, HipSubgroupSizesGCN5>;
220-
def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
221250
def : HipTargetInfo<"amd_gpu_gfx909", HipMinAspects, HipSubgroupSizesGCN5>;
222-
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
223-
def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesGCN5>;
224-
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
225-
def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // TBA
226-
def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // TBA
227-
def : HipTargetInfo<"amd_gpu_gfx1010", HipMinAspects, HipSubgroupSizesRDNA>;
228-
def : HipTargetInfo<"amd_gpu_gfx1011", HipMinAspects, HipSubgroupSizesRDNA>;
229-
def : HipTargetInfo<"amd_gpu_gfx1012", HipMinAspects, HipSubgroupSizesRDNA>;
230-
def : HipTargetInfo<"amd_gpu_gfx1013", HipMinAspects, HipSubgroupSizesRDNA>;
231-
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesRDNA>;
232-
def : HipTargetInfo<"amd_gpu_gfx1031", HipMinAspects, HipSubgroupSizesRDNA>;
233-
def : HipTargetInfo<"amd_gpu_gfx1032", HipMinAspects, HipSubgroupSizesRDNA>;
234-
def : HipTargetInfo<"amd_gpu_gfx1033", HipMinAspects, HipSubgroupSizesRDNA>;
235-
def : HipTargetInfo<"amd_gpu_gfx1034", HipMinAspects, HipSubgroupSizesRDNA>;
236-
def : HipTargetInfo<"amd_gpu_gfx1035", HipMinAspects, HipSubgroupSizesRDNA>;
237-
def : HipTargetInfo<"amd_gpu_gfx1036", HipMinAspects, HipSubgroupSizesRDNA>;
238-
def : HipTargetInfo<"amd_gpu_gfx1100", HipMinAspects, HipSubgroupSizesRDNA>;
239-
def : HipTargetInfo<"amd_gpu_gfx1101", HipMinAspects, HipSubgroupSizesRDNA>;
240-
def : HipTargetInfo<"amd_gpu_gfx1102", HipMinAspects, HipSubgroupSizesRDNA>;
241-
def : HipTargetInfo<"amd_gpu_gfx1103", HipMinAspects, HipSubgroupSizesRDNA>;
242-
def : HipTargetInfo<"amd_gpu_gfx1150", HipMinAspects, HipSubgroupSizesRDNA>;
243-
def : HipTargetInfo<"amd_gpu_gfx1151", HipMinAspects, HipSubgroupSizesRDNA>;
244-
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // TBA
245-
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // TBA
251+
def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesGCN5>;
252+
def : HipTargetInfo<"amd_gpu_gfx1010", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
253+
def : HipTargetInfo<"amd_gpu_gfx1011", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
254+
def : HipTargetInfo<"amd_gpu_gfx1012", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
255+
def : HipTargetInfo<"amd_gpu_gfx1013", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
256+
def : HipTargetInfo<"amd_gpu_gfx1031", !listconcat(!listremove(HipMinAspects, [AspectExt_intel_free_memory]), AllUSMAspects),
257+
HipSubgroupSizesRDNA>;
258+
def : HipTargetInfo<"amd_gpu_gfx1032", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
259+
def : HipTargetInfo<"amd_gpu_gfx1033", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
260+
def : HipTargetInfo<"amd_gpu_gfx1034", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
261+
def : HipTargetInfo<"amd_gpu_gfx1035", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
262+
def : HipTargetInfo<"amd_gpu_gfx1036", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
263+
def : HipTargetInfo<"amd_gpu_gfx1101", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
264+
def : HipTargetInfo<"amd_gpu_gfx1102", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
265+
def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
266+
def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
267+
def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
268+
// TBA
269+
def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
270+
def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
271+
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
272+
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4

0 commit comments

Comments
 (0)