Skip to content

Commit 25785a2

Browse files
committed
[SYCL][Driver] Refactor device config parsing to better match HIP and CUDA targets
This commit also moves the parsing into SYCLActionBuilder, so it can make use of the device architecture bound to the offloading toolchain to lookup in the DeviceConfig file's TargetTable.
1 parent 4c7baa7 commit 25785a2

File tree

6 files changed

+224
-95
lines changed

6 files changed

+224
-95
lines changed

clang/include/clang/Driver/Driver.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -973,12 +973,12 @@ class Driver {
973973
return SYCLUniqueIDList[FileName];
974974
}
975975

976-
/// Reads device config file to find information about the SYCL targets in
977-
/// UniqueSYCLTriplesVec, and defines device traits macros accordingly.
978-
void populateSYCLDeviceTraitsMacrosArgs(
979-
const llvm::opt::ArgList &Args,
980-
const llvm::SmallVector<llvm::Triple, 4> &UniqueSYCLTriplesVec);
981-
976+
/// SYCLDeviceTraitMacroArg - Add the given macro to the vector of args to be
977+
/// added to the device compilation step.
978+
void addSYCLDeviceTraitsMacroArg(const llvm::opt::ArgList &Args,
979+
StringRef Macro) const {
980+
SYCLDeviceTraitsMacrosArgs.push_back(Args.MakeArgString(Macro));
981+
}
982982
llvm::opt::ArgStringList getDeviceTraitsMacrosArgs() const {
983983
return SYCLDeviceTraitsMacrosArgs;
984984
}

clang/lib/Driver/Driver.cpp

Lines changed: 100 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,9 +1310,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
13101310
}
13111311
}
13121312
}
1313-
// Define macros associated with `any_device_has/all_devices_have` according
1314-
// to the aspects defined in the DeviceConfigFile for the SYCL targets.
1315-
populateSYCLDeviceTraitsMacrosArgs(C.getInputArgs(), UniqueSYCLTriplesVec);
13161313
// We'll need to use the SYCL and host triples as the key into
13171314
// getOffloadingDeviceToolChain, because the device toolchains we're
13181315
// going to create will depend on both.
@@ -6261,6 +6258,101 @@ class OffloadingActionBuilder final {
62616258
return FinalDeviceSections;
62626259
}
62636260

6261+
/// Reads device config file to find information about the SYCL targets in
6262+
/// `Targets`, and defines device traits macros accordingly.
6263+
void populateSYCLDeviceTraitsMacrosArgs(
6264+
Compilation &C, DerivedArgList &Args,
6265+
SmallVector<DeviceTargetInfo, 4> &Targets) const {
6266+
if (Targets.empty())
6267+
return;
6268+
6269+
const auto &TargetTable = DeviceConfigFile::TargetTable;
6270+
std::map<StringRef, unsigned int> AllDevicesHave;
6271+
std::map<StringRef, bool> AnyDeviceHas;
6272+
bool AnyDeviceHasAnyAspect = false;
6273+
unsigned int ValidTargets = 0;
6274+
for (const auto &[TC, BoundArch] : Targets) {
6275+
assert(TC && "Invalid SYCL Offload Toolchain");
6276+
// Try and find the device arch, if it's empty, try to search for either
6277+
// the whole Triple or just the 'ArchName' string.
6278+
auto TargetIt = TargetTable.end();
6279+
const llvm::Triple &TargetTriple = TC->getTriple();
6280+
const StringRef TargetArch{BoundArch};
6281+
if (!TargetArch.empty()) {
6282+
TargetIt = llvm::find_if(TargetTable, [&](const auto &Value) {
6283+
using namespace tools::SYCL;
6284+
StringRef Device{Value.first};
6285+
if (Device.consume_front(gen::AmdGPU))
6286+
return TargetArch.equals(Device) && TargetTriple.isAMDGCN();
6287+
if (Device.consume_front(gen::NvidiaGPU))
6288+
return TargetArch.equals(Device) && TargetTriple.isNVPTX();
6289+
if (Device.consume_front(gen::IntelGPU))
6290+
return TargetArch.equals(Device) && TargetTriple.isSPIRAOT();
6291+
return TargetArch.equals(Device) && isValidSYCLTriple(TargetTriple);
6292+
});
6293+
} else {
6294+
TargetIt = TargetTable.find(TargetTriple.str());
6295+
if (TargetIt == TargetTable.end())
6296+
TargetIt = TargetTable.find(TargetTriple.getArchName().str());
6297+
}
6298+
6299+
if (TargetIt != TargetTable.end()) {
6300+
const DeviceConfigFile::TargetInfo &Info = (*TargetIt).second;
6301+
++ValidTargets;
6302+
const auto &AspectList = Info.aspects;
6303+
const auto &MaySupportOtherAspects = Info.maySupportOtherAspects;
6304+
if (!AnyDeviceHasAnyAspect)
6305+
AnyDeviceHasAnyAspect = MaySupportOtherAspects;
6306+
for (const auto &aspect : AspectList) {
6307+
// If target has an entry in the config file, the set of aspects
6308+
// supported by all devices supporting the target is 'AspectList'.
6309+
// If there's no entry, such set is empty.
6310+
const auto &AspectIt = AllDevicesHave.find(aspect);
6311+
if (AspectIt != AllDevicesHave.end())
6312+
++AllDevicesHave[aspect];
6313+
else
6314+
AllDevicesHave[aspect] = 1;
6315+
// If target has an entry in the config file AND
6316+
// 'MaySupportOtherAspects' is false, the set of aspects supported
6317+
// by any device supporting the target is 'AspectList'. If there's
6318+
// no entry OR 'MaySupportOtherAspects' is true, such set contains
6319+
// all the aspects.
6320+
AnyDeviceHas[aspect] = true;
6321+
}
6322+
}
6323+
}
6324+
6325+
// If there's no entry for the target in the device config file, the set
6326+
// of aspects supported by any device supporting the target contains all
6327+
// the aspects.
6328+
if (ValidTargets == 0)
6329+
AnyDeviceHasAnyAspect = true;
6330+
6331+
const Driver &D = C.getDriver();
6332+
if (AnyDeviceHasAnyAspect) {
6333+
// There exists some target that supports any given aspect.
6334+
constexpr static StringRef MacroAnyDeviceAnyAspect{
6335+
"-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"};
6336+
D.addSYCLDeviceTraitsMacroArg(Args, MacroAnyDeviceAnyAspect);
6337+
} else {
6338+
// Some of the aspects are not supported at all by any of the targets.
6339+
// Thus, we need to define individual macros for each supported aspect.
6340+
for (const auto &[TargetKey, SupportedTarget] : AnyDeviceHas) {
6341+
assert(SupportedTarget);
6342+
const SmallString<64> MacroAnyDevice{
6343+
{"-D__SYCL_ANY_DEVICE_HAS_", TargetKey, "__=1"}};
6344+
D.addSYCLDeviceTraitsMacroArg(Args, MacroAnyDevice);
6345+
}
6346+
}
6347+
for (const auto &[TargetKey, SupportedTargets] : AllDevicesHave) {
6348+
if (SupportedTargets != ValidTargets)
6349+
continue;
6350+
const SmallString<64> MacroAllDevices{
6351+
{"-D__SYCL_ALL_DEVICES_HAVE_", TargetKey, "__=1"}};
6352+
D.addSYCLDeviceTraitsMacroArg(Args, MacroAllDevices);
6353+
}
6354+
}
6355+
62646356
bool initialize() override {
62656357
using namespace tools::SYCL;
62666358
// Get the SYCL toolchains. If we don't get any, the action builder will
@@ -6520,6 +6612,11 @@ class OffloadingActionBuilder final {
65206612
checkForOffloadMismatch(C, Args, SYCLTargetInfoList);
65216613
checkForMisusedAddDefaultSpecConstsImageFlag(C, Args, SYCLTargetInfoList);
65226614

6615+
// Define macros associated with `any_device_has/all_devices_have`
6616+
// according to the aspects defined in the DeviceConfigFile for the SYCL
6617+
// targets.
6618+
populateSYCLDeviceTraitsMacrosArgs(C, Args, SYCLTargetInfoList);
6619+
65236620
DeviceLinkerInputs.resize(SYCLTargetInfoList.size());
65246621
return false;
65256622
}
@@ -10410,92 +10507,6 @@ llvm::Error driver::expandResponseFiles(SmallVectorImpl<const char *> &Args,
1041010507
return llvm::Error::success();
1041110508
}
1041210509

10413-
void Driver::populateSYCLDeviceTraitsMacrosArgs(
10414-
const llvm::opt::ArgList &Args,
10415-
const llvm::SmallVector<llvm::Triple, 4> &UniqueSYCLTriplesVec) {
10416-
const auto &TargetTable = DeviceConfigFile::TargetTable;
10417-
std::map<StringRef, unsigned int> AllDevicesHave;
10418-
std::map<StringRef, bool> AnyDeviceHas;
10419-
bool AnyDeviceHasAnyAspect = false;
10420-
unsigned int ValidTargets = 0;
10421-
for (const auto &TargetTriple : UniqueSYCLTriplesVec) {
10422-
// Try and find the whole triple, if there's no match, remove parts of the
10423-
// triple from the end to find partial matches.
10424-
auto TargetTripleStr = TargetTriple.getTriple();
10425-
bool Found = false;
10426-
bool EmptyTriple = false;
10427-
auto TripleIt = TargetTable.end();
10428-
while (!Found && !EmptyTriple) {
10429-
TripleIt = TargetTable.find(TargetTripleStr);
10430-
Found = (TripleIt != TargetTable.end());
10431-
if (!Found) {
10432-
auto Pos = TargetTripleStr.find_last_of('-');
10433-
EmptyTriple = (Pos == std::string::npos);
10434-
TargetTripleStr =
10435-
EmptyTriple ? TargetTripleStr : TargetTripleStr.substr(0, Pos);
10436-
}
10437-
}
10438-
if (Found) {
10439-
assert(TripleIt != TargetTable.end());
10440-
const auto &TargetInfo = (*TripleIt).second;
10441-
++ValidTargets;
10442-
const auto &AspectList = TargetInfo.aspects;
10443-
const auto &MaySupportOtherAspects = TargetInfo.maySupportOtherAspects;
10444-
if (!AnyDeviceHasAnyAspect)
10445-
AnyDeviceHasAnyAspect = MaySupportOtherAspects;
10446-
for (const auto &aspect : AspectList) {
10447-
// If target has an entry in the config file, the set of aspects
10448-
// supported by all devices supporting the target is 'AspectList'. If
10449-
// there's no entry, such set is empty.
10450-
const auto &AspectIt = AllDevicesHave.find(aspect);
10451-
if (AspectIt != AllDevicesHave.end())
10452-
++AllDevicesHave[aspect];
10453-
else
10454-
AllDevicesHave[aspect] = 1;
10455-
// If target has an entry in the config file AND
10456-
// 'MaySupportOtherAspects' is false, the set of aspects supported by
10457-
// any device supporting the target is 'AspectList'. If there's no
10458-
// entry OR 'MaySupportOtherAspects' is true, such set contains all
10459-
// the aspects.
10460-
AnyDeviceHas[aspect] = true;
10461-
}
10462-
}
10463-
}
10464-
10465-
if (ValidTargets == 0) {
10466-
// If there's no entry for the target in the device config file, the set
10467-
// of aspects supported by any device supporting the target contains all
10468-
// the aspects.
10469-
AnyDeviceHasAnyAspect = true;
10470-
}
10471-
10472-
if (AnyDeviceHasAnyAspect) {
10473-
// There exists some target that supports any given aspect.
10474-
SmallString<64> MacroAnyDeviceAnyAspect(
10475-
"-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1");
10476-
SYCLDeviceTraitsMacrosArgs.push_back(
10477-
Args.MakeArgString(MacroAnyDeviceAnyAspect));
10478-
} else {
10479-
// Some of the aspects are not supported at all by any of the targets.
10480-
// Thus, we need to define individual macros for each supported aspect.
10481-
for (const auto &[TargetKey, SupportedTarget] : AnyDeviceHas) {
10482-
assert(SupportedTarget);
10483-
SmallString<64> MacroAnyDevice("-D__SYCL_ANY_DEVICE_HAS_");
10484-
MacroAnyDevice += TargetKey;
10485-
MacroAnyDevice += "__=1";
10486-
SYCLDeviceTraitsMacrosArgs.push_back(Args.MakeArgString(MacroAnyDevice));
10487-
}
10488-
}
10489-
for (const auto &[TargetKey, SupportedTargets] : AllDevicesHave) {
10490-
if (SupportedTargets != ValidTargets)
10491-
continue;
10492-
SmallString<64> MacroAllDevices("-D__SYCL_ALL_DEVICES_HAVE_");
10493-
MacroAllDevices += TargetKey;
10494-
MacroAllDevices += "__=1";
10495-
SYCLDeviceTraitsMacrosArgs.push_back(Args.MakeArgString(MacroAllDevices));
10496-
}
10497-
}
10498-
1049910510
static const char *GetStableCStr(llvm::StringSet<> &SavedStrings, StringRef S) {
1050010511
return SavedStrings.insert(S).first->getKeyData();
1050110512
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// REQUIRES: amdgpu-registered-target
2+
3+
// Check device traits macros are defined if sycl is enabled:
4+
// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 -### %s 2>&1 \
5+
// RUN: | FileCheck -check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA %s
6+
// CHECK-SYCL-AMDGCN-AMD-AMDHSA-NOT: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"
7+
// CHECK-SYCL-AMDGCN-AMD-AMDHSA: "-D__SYCL_ANY_DEVICE_HAS_{{.*}}__=1"
8+
// CHECK-SYCL-AMDGCN-AMD-AMDHSA: "{{(-D__SYCL_ALL_DEVICES_HAVE_)?}}{{.*}}{{(__=1)?}}"
9+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// REQUIRES: nvptx-registered-target
2+
3+
// Check device traits macros are defined if sycl is enabled:
4+
// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -### %s 2>&1 \
5+
// RUN: | FileCheck -check-prefix=CHECK-SYCL-NVPTX-NVIDIA-CUDA %s
6+
// CHECK-SYCL-NVPTX-NVIDIA-CUDA-COUNT-2: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"
7+
// CHECK-SYCL-NVPTX-NVIDIA-CUDA: "-D__SYCL_ANY_DEVICE_HAS_{{.*}}__=1"
8+
// CHECK-SYCL-NVPTX-NVIDIA-CUDA: "{{(-D__SYCL_ALL_DEVICES_HAVE_)?}}{{.*}}{{(__=1)?}}"
9+

clang/test/Driver/sycl-device-traits-macros.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,12 @@
1919
// RUN: %clang -fsycl -fsycl-targets=spir64,spir64_gen -### %s 2>&1 \
2020
// RUN: | FileCheck -check-prefix=CHECK-SYCL-TARGETS %s
2121
// CHECK-SYCL-TARGETS-COUNT-3: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"
22+
23+
/// Check device traits macros are defined if sycl is enabled:
24+
/// In this case, no specific sycl targets are passed, and `-fsycl-device-only`
25+
/// is provided for device compilation only with no `fsycl`, the only sycl
26+
/// target is the default spir64 without a host target. Hence, we expect only
27+
/// one occurrence of the macro definition (for the device target).
28+
// RUN: %clang -fsycl-device-only -### %s 2>&1 \
29+
// RUN: | FileCheck -check-prefix=CHECK-SYCL-DEVICE-ONLY %s
30+
// CHECK-SYCL-DEVICE-ONLY-COUNT-1: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,94 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>;
152152
def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>;
153153
def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>;
154154
def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>;
155+
156+
//
157+
// CUDA / NVPTX device aspects
158+
//
159+
160+
// Specialize the TargetInfo class for CUDA:
161+
// 'maySupportOtherAspects' is set to 1 because any Cuda device with the same
162+
// or newer capabilities will support the aspects of the least capable device.
163+
class CudaTargetInfo<string targetName, list<Aspect> aspectList, int subGroupSize = 32, string toolchain = "", string options = "">
164+
: TargetInfo<targetName, aspectList, [subGroupSize], toolchain, options, /*maySupportOtherAspects*/1> {
165+
assert !eq(subGroupSize, 32), "sub-group size for Cuda must be equal to 32 and not " # subGroupSize # ".";
166+
}
167+
168+
defvar CudaMinAspects = [AspectGpu, AspectFp16, AspectFp64, AspectQueue_profiling,
169+
AspectUsm_device_allocations, AspectUsm_host_allocations,
170+
AspectUsm_shared_allocations, AspectAtomic64];
171+
// Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
172+
defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import];
173+
174+
def : CudaTargetInfo<"nvidia_gpu_sm_50", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
175+
def : CudaTargetInfo<"nvidia_gpu_sm_52", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
176+
def : CudaTargetInfo<"nvidia_gpu_sm_53", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
177+
def : CudaTargetInfo<"nvidia_gpu_sm_60", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
178+
def : CudaTargetInfo<"nvidia_gpu_sm_61", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
179+
def : CudaTargetInfo<"nvidia_gpu_sm_62", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
180+
def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
181+
def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
182+
def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
183+
def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
184+
def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
185+
def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
186+
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
187+
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects)>;
188+
189+
//
190+
// HIP / AMDGPU device aspects
191+
//
192+
193+
// Specialize the TargetInfo class for HIP:
194+
// 'maySupportOtherAspects' is set to 0 because there is no guarantee that any
195+
// aspect support is compatible between older and newer gfx architectures.
196+
class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGroupSizesList, string toolchain = "", string options = "">
197+
: TargetInfo<targetName, aspectList, subGroupSizesList, toolchain, options, /*maySupportOtherAspects*/0>;
198+
199+
// DPCPP does not support AMD targets prior to the gfx7 (GCN2) family.
200+
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
201+
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
202+
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906, gfx90c, GCN 5th gen is also known as Vega
203+
defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
204+
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
205+
206+
defvar HipMinAspects = [AspectGpu, AspectQueue_profiling, AspectUsm_device_allocations, AspectUsm_host_allocations];
207+
208+
def : HipTargetInfo<"amd_gpu_gfx700", HipMinAspects, HipSubgroupSizesGCN2>;
209+
def : HipTargetInfo<"amd_gpu_gfx701", HipMinAspects, HipSubgroupSizesGCN2>;
210+
def : HipTargetInfo<"amd_gpu_gfx702", HipMinAspects, HipSubgroupSizesGCN2>;
211+
def : HipTargetInfo<"amd_gpu_gfx801", HipMinAspects, HipSubgroupSizesGCN3>;
212+
def : HipTargetInfo<"amd_gpu_gfx802", HipMinAspects, HipSubgroupSizesGCN3>;
213+
def : HipTargetInfo<"amd_gpu_gfx803", HipMinAspects, HipSubgroupSizesGCN3>;
214+
def : HipTargetInfo<"amd_gpu_gfx805", HipMinAspects, HipSubgroupSizesGCN3>;
215+
def : HipTargetInfo<"amd_gpu_gfx810", HipMinAspects, HipSubgroupSizesGCN3>;
216+
def : HipTargetInfo<"amd_gpu_gfx900", HipMinAspects, HipSubgroupSizesGCN5>;
217+
def : HipTargetInfo<"amd_gpu_gfx902", HipMinAspects, HipSubgroupSizesGCN5>;
218+
def : HipTargetInfo<"amd_gpu_gfx904", HipMinAspects, HipSubgroupSizesGCN5>;
219+
def : HipTargetInfo<"amd_gpu_gfx906", HipMinAspects, HipSubgroupSizesGCN5>;
220+
def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
221+
def : HipTargetInfo<"amd_gpu_gfx909", HipMinAspects, HipSubgroupSizesGCN5>;
222+
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
223+
def : HipTargetInfo<"amd_gpu_gfx90c", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesGCN5>;
224+
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesCDNA>;
225+
def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // TBA
226+
def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // TBA
227+
def : HipTargetInfo<"amd_gpu_gfx1010", HipMinAspects, HipSubgroupSizesRDNA>;
228+
def : HipTargetInfo<"amd_gpu_gfx1011", HipMinAspects, HipSubgroupSizesRDNA>;
229+
def : HipTargetInfo<"amd_gpu_gfx1012", HipMinAspects, HipSubgroupSizesRDNA>;
230+
def : HipTargetInfo<"amd_gpu_gfx1013", HipMinAspects, HipSubgroupSizesRDNA>;
231+
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, [AspectUsm_shared_allocations]), HipSubgroupSizesRDNA>;
232+
def : HipTargetInfo<"amd_gpu_gfx1031", HipMinAspects, HipSubgroupSizesRDNA>;
233+
def : HipTargetInfo<"amd_gpu_gfx1032", HipMinAspects, HipSubgroupSizesRDNA>;
234+
def : HipTargetInfo<"amd_gpu_gfx1033", HipMinAspects, HipSubgroupSizesRDNA>;
235+
def : HipTargetInfo<"amd_gpu_gfx1034", HipMinAspects, HipSubgroupSizesRDNA>;
236+
def : HipTargetInfo<"amd_gpu_gfx1035", HipMinAspects, HipSubgroupSizesRDNA>;
237+
def : HipTargetInfo<"amd_gpu_gfx1036", HipMinAspects, HipSubgroupSizesRDNA>;
238+
def : HipTargetInfo<"amd_gpu_gfx1100", HipMinAspects, HipSubgroupSizesRDNA>;
239+
def : HipTargetInfo<"amd_gpu_gfx1101", HipMinAspects, HipSubgroupSizesRDNA>;
240+
def : HipTargetInfo<"amd_gpu_gfx1102", HipMinAspects, HipSubgroupSizesRDNA>;
241+
def : HipTargetInfo<"amd_gpu_gfx1103", HipMinAspects, HipSubgroupSizesRDNA>;
242+
def : HipTargetInfo<"amd_gpu_gfx1150", HipMinAspects, HipSubgroupSizesRDNA>;
243+
def : HipTargetInfo<"amd_gpu_gfx1151", HipMinAspects, HipSubgroupSizesRDNA>;
244+
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // TBA
245+
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // TBA

0 commit comments

Comments
 (0)