Skip to content

[NFCI][SYCL] Refactor device selection in platform_impl.cpp #12288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 5, 2024
Merged
267 changes: 113 additions & 154 deletions sycl/source/detail/platform_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,59 +266,33 @@ std::vector<int> platform_impl::filterDeviceFilter(
MPlugin->call<PiApiKind::piDeviceGetInfo>(
Device, PI_DEVICE_INFO_TYPE, sizeof(sycl::detail::pi::PiDeviceType),
&PiDevType, nullptr);
// Assumption here is that there is 1-to-1 mapping between PiDevType and
// Sycl device type for GPU, CPU, and ACC.
info::device_type DeviceType = pi::cast<info::device_type>(PiDevType);

for (const FilterT &Filter : FilterList->get()) {
backend FilterBackend = Filter.Backend.value_or(backend::all);
// First, match the backend entry
if (FilterBackend == Backend || FilterBackend == backend::all) {
info::device_type FilterDevType =
Filter.DeviceType.value_or(info::device_type::all);
// Next, match the device_type entry
if (FilterDevType == info::device_type::all) {
// Last, match the device_num entry
if (!Filter.DeviceNum || DeviceNum == Filter.DeviceNum.value()) {
if constexpr (is_ods_target) { // dealing with ODS filters
if (!Blacklist[DeviceNum]) { // ensure it is not blacklisted
if (!Filter.IsNegativeTarget) { // is filter positive?
PiDevices[InsertIDx++] = Device;
original_indices.push_back(DeviceNum);
} else {
// Filter is negative and the device matches the filter so
// blacklist the device.
Blacklist[DeviceNum] = true;
}
}
} else { // dealing with SYCL_DEVICE_FILTER
PiDevices[InsertIDx++] = Device;
original_indices.push_back(DeviceNum);
}
break;
}

} else if (FilterDevType == DeviceType) {
if (!Filter.DeviceNum || DeviceNum == Filter.DeviceNum.value()) {
if constexpr (is_ods_target) {
if (!Blacklist[DeviceNum]) {
if (!Filter.IsNegativeTarget) {
PiDevices[InsertIDx++] = Device;
original_indices.push_back(DeviceNum);
} else {
// Filter is negative and the device matches the filter so
// blacklist the device.
Blacklist[DeviceNum] = true;
}
}
} else {
PiDevices[InsertIDx++] = Device;
original_indices.push_back(DeviceNum);
}
break;
}
// First, match the backend entry.
if (FilterBackend != Backend && FilterBackend != backend::all)
continue;

// Match the device_num entry.
if (Filter.DeviceNum && DeviceNum != Filter.DeviceNum.value())
continue;

if constexpr (is_ods_target) {
// Dealing with ONEAPI_DEVICE_SELECTOR - check for negative filters.
if (Blacklist[DeviceNum]) // already blacklisted.
break;

if (Filter.IsNegativeTarget) {
// Filter is negative and the device matches the filter so
// blacklist the device now.
Blacklist[DeviceNum] = true;
break;
}
}

PiDevices[InsertIDx++] = Device;
original_indices.push_back(DeviceNum);
break;
}
DeviceNum++;
}
Expand Down Expand Up @@ -392,116 +366,101 @@ static std::vector<device> amendDeviceAndSubDevices(
bool deviceAdded = false;
for (ods_target target : OdsTargetList->get()) {
backend TargetBackend = target.Backend.value_or(backend::all);
if (PlatformBackend == TargetBackend || TargetBackend == backend::all) {
bool deviceMatch = target.HasDeviceWildCard; // opencl:*
if (target.DeviceType) { // opencl:gpu
deviceMatch = ((target.DeviceType == info::device_type::all) ||
(dev.get_info<info::device::device_type>() ==
target.DeviceType));

} else if (target.DeviceNum) { // opencl:0
deviceMatch = (target.DeviceNum.value() == original_indices[i]);
if (PlatformBackend != TargetBackend && TargetBackend != backend::all)
continue;

bool deviceMatch = target.HasDeviceWildCard; // opencl:*
if (target.DeviceType) { // opencl:gpu
deviceMatch =
((target.DeviceType == info::device_type::all) ||
(dev.get_info<info::device::device_type>() == target.DeviceType));

} else if (target.DeviceNum) { // opencl:0
deviceMatch = (target.DeviceNum.value() == original_indices[i]);
}

if (!deviceMatch)
continue;

// Top level matches. Do we add it, or subdevices, or sub-sub-devices?
bool wantSubDevice = target.SubDeviceNum || target.HasSubDeviceWildCard;
bool supportsSubPartitioning =
(supportsPartitionProperty(dev, partitionProperty) &&
supportsAffinityDomain(dev, partitionProperty, affinityDomain));
bool wantSubSubDevice =
target.SubSubDeviceNum || target.HasSubSubDeviceWildCard;

if (!wantSubDevice) {
// -- Add top level device only.
if (!deviceAdded) {
FinalResult.push_back(dev);
deviceAdded = true;
}
continue;
}

if (!supportsSubPartitioning) {
if (target.DeviceNum ||
(target.DeviceType &&
(target.DeviceType.value() != info::device_type::all))) {
// This device was specifically requested and yet is not
// partitionable.
std::cout << "device is not partitionable: " << target << std::endl;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand these and other prints are actually diagnosing errors (e.g. trying to partition a device that is not partitionable). Is my understanding correct? If so, I wonder if just printing and keep going as if nothing happened is good enough as a diagnostic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They were here before me, any changes in the behavior should go into a separate PR :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I saw they were already there in the old version, and I agree that it should be a separate PR if we change that, but I wanted to bring up the debate. What do you think? Should we keep diagnostics as they are now? Or should we stop if we find an error?

I'm approving this PR since we agreed this should go into a separate PR, if we do anything about it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I honestly would expect an exception thrown, but maybe @cperkinsintel had his reasons to implement it this way (I assume it was added as part of his major work in this area).

}
continue;
}

if (deviceMatch) {
// Top level matches. Do we add it, or subdevices, or sub-sub-devices?
bool wantSubDevice =
target.SubDeviceNum || target.HasSubDeviceWildCard;
bool supportsSubPartitioning =
(supportsPartitionProperty(dev, partitionProperty) &&
supportsAffinityDomain(dev, partitionProperty, affinityDomain));
bool wantSubSubDevice =
target.SubSubDeviceNum || target.HasSubSubDeviceWildCard;

// -- Add top level device.
if (!wantSubDevice) {
if (!deviceAdded) {
FinalResult.push_back(dev);
deviceAdded = true;
}
} else {
if (!supportsSubPartitioning) {
if (target.DeviceNum ||
(target.DeviceType &&
(target.DeviceType.value() != info::device_type::all))) {
// This device was specifically requested and yet is not
// partitionable.
std::cout << "device is not partitionable: " << target
<< std::endl;
}
continue;
}
// -- Add sub sub device.
if (wantSubSubDevice) {

auto subDevicesToPartition =
dev.create_sub_devices<partitionProperty>(affinityDomain);
if (target.SubDeviceNum) {
if (subDevicesToPartition.size() >
target.SubDeviceNum.value()) {
subDevicesToPartition[0] =
subDevicesToPartition[target.SubDeviceNum.value()];
subDevicesToPartition.resize(1);
} else {
std::cout << "subdevice index out of bounds: " << target
<< std::endl;
continue;
}
}
for (device subDev : subDevicesToPartition) {
bool supportsSubSubPartitioning =
(supportsPartitionProperty(subDev, partitionProperty) &&
supportsAffinityDomain(subDev, partitionProperty,
affinityDomain));
if (!supportsSubSubPartitioning) {
if (target.SubDeviceNum) {
// Parent subdevice was specifically requested, yet is not
// partitionable.
std::cout << "sub-device is not partitionable: " << target
<< std::endl;
}
continue;
}
// Allright, lets get them sub-sub-devices.
auto subSubDevices =
subDev.create_sub_devices<partitionProperty>(
affinityDomain);
if (target.HasSubSubDeviceWildCard) {
FinalResult.insert(FinalResult.end(), subSubDevices.begin(),
subSubDevices.end());
} else {
if (subSubDevices.size() > target.SubSubDeviceNum.value()) {
FinalResult.push_back(
subSubDevices[target.SubSubDeviceNum.value()]);
} else {
std::cout
<< "sub-sub-device index out of bounds: " << target
<< std::endl;
}
}
}
} else if (wantSubDevice) {
auto subDevices = dev.create_sub_devices<
info::partition_property::partition_by_affinity_domain>(
affinityDomain);
if (target.HasSubDeviceWildCard) {
FinalResult.insert(FinalResult.end(), subDevices.begin(),
subDevices.end());
} else {
if (subDevices.size() > target.SubDeviceNum.value()) {
FinalResult.push_back(
subDevices[target.SubDeviceNum.value()]);
} else {
std::cout << "subdevice index out of bounds: " << target
<< std::endl;
}
}
}
auto subDevices = dev.create_sub_devices<
info::partition_property::partition_by_affinity_domain>(
affinityDomain);
if (target.SubDeviceNum) {
if (subDevices.size() <= target.SubDeviceNum.value()) {
std::cout << "subdevice index out of bounds: " << target << std::endl;
continue;
}
subDevices[0] = subDevices[target.SubDeviceNum.value()];
subDevices.resize(1);
}

if (!wantSubSubDevice) {
// -- Add sub device(s) only.
FinalResult.insert(FinalResult.end(), subDevices.begin(),
subDevices.end());
continue;
}

// -- Add sub sub device(s).
for (device subDev : subDevices) {
bool supportsSubSubPartitioning =
(supportsPartitionProperty(subDev, partitionProperty) &&
supportsAffinityDomain(subDev, partitionProperty, affinityDomain));
if (!supportsSubSubPartitioning) {
if (target.SubDeviceNum) {
// Parent subdevice was specifically requested, yet is not
// partitionable.
std::cout << "sub-device is not partitionable: " << target
<< std::endl;
}
} // /if deviceMatch
continue;
}

// Allright, lets get them sub-sub-devices.
auto subSubDevices =
subDev.create_sub_devices<partitionProperty>(affinityDomain);
if (target.SubSubDeviceNum) {
if (subSubDevices.size() <= target.SubSubDeviceNum.value()) {
std::cout << "sub-sub-device index out of bounds: " << target
<< std::endl;
continue;
}
subSubDevices[0] = subSubDevices[target.SubSubDeviceNum.value()];
subSubDevices.resize(1);
}
FinalResult.insert(FinalResult.end(), subSubDevices.begin(),
subSubDevices.end());
}
} // /for
} // /for
}
}
return FinalResult;
}

Expand Down