@@ -110,12 +110,11 @@ class TargetInfo<string targetName, list<Aspect> aspectList, list<int> subGroupS
110
110
// This definition serves the only purpose of testing whether the aspect list defined in here and in SYCL RT match.
111
111
def : TargetInfo<"__TestAspectList",
112
112
[AspectCpu, AspectGpu, AspectAccelerator, AspectCustom, AspectFp16, AspectFp64, AspectImage, AspectOnline_compiler,
113
- AspectOnline_linker, AspectQueue_profiling, AspectUsm_device_allocations, AspectUsm_host_allocations,
114
- AspectUsm_shared_allocations, AspectUsm_system_allocations, AspectExt_intel_pci_address,
113
+ AspectOnline_linker, AspectQueue_profiling, AspectExt_intel_pci_address,
115
114
AspectExt_intel_gpu_eu_count, AspectExt_intel_gpu_eu_simd_width, AspectExt_intel_gpu_slices,
116
115
AspectExt_intel_gpu_subslices_per_slice, AspectExt_intel_gpu_eu_count_per_subslice,
117
116
AspectExt_intel_max_mem_bandwidth, AspectExt_intel_mem_channel, AspectUsm_atomic_host_allocations,
118
- AspectUsm_atomic_shared_allocations, AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb,
117
+ AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb,
119
118
AspectExt_oneapi_native_assert, AspectHost_debuggable, AspectExt_intel_gpu_hw_threads_per_eu,
120
119
AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_bfloat16_math_functions, AspectExt_intel_free_memory,
121
120
AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectEmulated,
@@ -165,26 +164,36 @@ class CudaTargetInfo<string targetName, list<Aspect> aspectList, int subGroupSiz
165
164
assert !eq(subGroupSize, 32), "sub-group size for Cuda must be equal to 32 and not " # subGroupSize # ".";
166
165
}
167
166
168
- defvar CudaMinAspects = [AspectGpu, AspectFp16, AspectFp64, AspectQueue_profiling,
169
- AspectUsm_device_allocations, AspectUsm_host_allocations,
170
- AspectUsm_shared_allocations, AspectAtomic64];
167
+ defvar CudaMinAspects = !listconcat(AllUSMAspects, [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker,
168
+ AspectQueue_profiling, AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_memory_bus_width,
169
+ AspectExt_intel_device_info_uuid, AspectExt_oneapi_native_assert, AspectExt_intel_free_memory, AspectExt_intel_device_id,
170
+ AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group,
171
+ AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]);
171
172
// Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
172
- defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import];
173
+ defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm,
174
+ AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import,
175
+ AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy,
176
+ AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap, AspectExt_oneapi_cubemap_seamless_filtering];
173
177
174
178
def : CudaTargetInfo<"nvidia_gpu_sm_50", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
175
179
def : CudaTargetInfo<"nvidia_gpu_sm_52", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
176
- def : CudaTargetInfo<"nvidia_gpu_sm_53", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
177
- def : CudaTargetInfo<"nvidia_gpu_sm_60", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
178
- def : CudaTargetInfo<"nvidia_gpu_sm_61", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
179
- def : CudaTargetInfo<"nvidia_gpu_sm_62", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
180
- def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
181
- def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
182
- def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
183
- def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
184
- def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
185
- def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
186
- def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
187
- def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
180
+ def : CudaTargetInfo<"nvidia_gpu_sm_53", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16])>;
181
+ def : CudaTargetInfo<"nvidia_gpu_sm_60", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
182
+ def : CudaTargetInfo<"nvidia_gpu_sm_61", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
183
+ def : CudaTargetInfo<"nvidia_gpu_sm_62", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
184
+ def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
185
+ def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
186
+ def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
187
+ def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
188
+ [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
189
+ def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
190
+ [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
191
+ def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
192
+ [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
193
+ def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
194
+ [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
195
+ def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
196
+ [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
188
197
189
198
//
190
199
// HIP / AMDGPU device aspects
@@ -199,12 +208,34 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
199
208
// DPCPP does not support AMD targets prior to the gfx7 (GCN2) family.
200
209
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
201
210
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
202
- defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906, gfx90c, GCN 5th gen is also known as Vega
211
+ defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 ( known as " Vega 7nm")
203
212
defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
204
213
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
205
214
206
- defvar HipMinAspects = [AspectGpu, AspectQueue_profiling, AspectUsm_device_allocations, AspectUsm_host_allocations];
215
+ defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
216
+ AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_device_id,
217
+ AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectExt_intel_free_memory];
207
218
219
+ // The following AMDGCN targets are ordered based on their ROCm driver support:
220
+ //
221
+ // Officially supported:
222
+ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
223
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), HipSubgroupSizesCDNA>;
224
+ def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
225
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
226
+ HipSubgroupSizesCDNA>;
227
+ def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
228
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
229
+ HipSubgroupSizesCDNA>;
230
+ def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
231
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
232
+ HipSubgroupSizesRDNA>;
233
+ def : HipTargetInfo<"amd_gpu_gfx1100", !listconcat(HipMinAspects, AllUSMAspects,
234
+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
235
+ HipSubgroupSizesRDNA>;
236
+ // Deprecated support:
237
+ def : HipTargetInfo<"amd_gpu_gfx906", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesGCN5>;
238
+ // Unsupported (or unofficially supported):
208
239
def : HipTargetInfo<"amd_gpu_gfx700", HipMinAspects, HipSubgroupSizesGCN2>;
209
240
def : HipTargetInfo<"amd_gpu_gfx701", HipMinAspects, HipSubgroupSizesGCN2>;
210
241
def : HipTargetInfo<"amd_gpu_gfx702", HipMinAspects, HipSubgroupSizesGCN2>;
@@ -216,30 +247,26 @@ def : HipTargetInfo<"amd_gpu_gfx810", HipMinAspects, HipSubgroupSizesGCN3>;
216
247
def : HipTargetInfo<"amd_gpu_gfx900", HipMinAspects, HipSubgroupSizesGCN5>;
217
248
def : HipTargetInfo<"amd_gpu_gfx902", HipMinAspects, HipSubgroupSizesGCN5>;
218
249
def : HipTargetInfo<"amd_gpu_gfx904", HipMinAspects, HipSubgroupSizesGCN5>;
219
- def : HipTargetInfo<"amd_gpu_gfx906", HipMinAspects, HipSubgroupSizesGCN5>;
220
- def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
221
250
def : HipTargetInfo<"amd_gpu_gfx909", HipMinAspects, HipSubgroupSizesGCN5>;
222
- def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
223
- def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesGCN5>;
224
- def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
225
- def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // TBA
226
- def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // TBA
227
- def : HipTargetInfo<"amd_gpu_gfx1010", HipMinAspects, HipSubgroupSizesRDNA>;
228
- def : HipTargetInfo<"amd_gpu_gfx1011", HipMinAspects, HipSubgroupSizesRDNA>;
229
- def : HipTargetInfo<"amd_gpu_gfx1012", HipMinAspects, HipSubgroupSizesRDNA>;
230
- def : HipTargetInfo<"amd_gpu_gfx1013", HipMinAspects, HipSubgroupSizesRDNA>;
231
- def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesRDNA>;
232
- def : HipTargetInfo<"amd_gpu_gfx1031", HipMinAspects, HipSubgroupSizesRDNA>;
233
- def : HipTargetInfo<"amd_gpu_gfx1032", HipMinAspects, HipSubgroupSizesRDNA>;
234
- def : HipTargetInfo<"amd_gpu_gfx1033", HipMinAspects, HipSubgroupSizesRDNA>;
235
- def : HipTargetInfo<"amd_gpu_gfx1034", HipMinAspects, HipSubgroupSizesRDNA>;
236
- def : HipTargetInfo<"amd_gpu_gfx1035", HipMinAspects, HipSubgroupSizesRDNA>;
237
- def : HipTargetInfo<"amd_gpu_gfx1036", HipMinAspects, HipSubgroupSizesRDNA>;
238
- def : HipTargetInfo<"amd_gpu_gfx1100", HipMinAspects, HipSubgroupSizesRDNA>;
239
- def : HipTargetInfo<"amd_gpu_gfx1101", HipMinAspects, HipSubgroupSizesRDNA>;
240
- def : HipTargetInfo<"amd_gpu_gfx1102", HipMinAspects, HipSubgroupSizesRDNA>;
241
- def : HipTargetInfo<"amd_gpu_gfx1103", HipMinAspects, HipSubgroupSizesRDNA>;
242
- def : HipTargetInfo<"amd_gpu_gfx1150", HipMinAspects, HipSubgroupSizesRDNA>;
243
- def : HipTargetInfo<"amd_gpu_gfx1151", HipMinAspects, HipSubgroupSizesRDNA>;
244
- def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // TBA
245
- def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // TBA
251
+ def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesGCN5>;
252
+ def : HipTargetInfo<"amd_gpu_gfx1010", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
253
+ def : HipTargetInfo<"amd_gpu_gfx1011", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
254
+ def : HipTargetInfo<"amd_gpu_gfx1012", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
255
+ def : HipTargetInfo<"amd_gpu_gfx1013", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
256
+ def : HipTargetInfo<"amd_gpu_gfx1031", !listconcat(!listremove(HipMinAspects, [AspectExt_intel_free_memory]), AllUSMAspects),
257
+ HipSubgroupSizesRDNA>;
258
+ def : HipTargetInfo<"amd_gpu_gfx1032", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
259
+ def : HipTargetInfo<"amd_gpu_gfx1033", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
260
+ def : HipTargetInfo<"amd_gpu_gfx1034", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
261
+ def : HipTargetInfo<"amd_gpu_gfx1035", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
262
+ def : HipTargetInfo<"amd_gpu_gfx1036", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
263
+ def : HipTargetInfo<"amd_gpu_gfx1101", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
264
+ def : HipTargetInfo<"amd_gpu_gfx1102", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
265
+ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
266
+ def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
267
+ def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
268
+ // TBA
269
+ def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
270
+ def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
271
+ def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
272
+ def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4
0 commit comments