@@ -630,61 +630,8 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram(
630
630
getDeviceImage (KSId, Context, Device, JITCompilationIsRequired);
631
631
632
632
// Check that device supports all aspects used by the kernel
633
- const RTDeviceBinaryImage::PropertyRange &ARange =
634
- Img.getDeviceRequirements ();
635
-
636
- #define __SYCL_ASPECT (ASPECT, ID ) \
637
- case aspect::ASPECT: \
638
- return #ASPECT;
639
- #define __SYCL_ASPECT_DEPRECATED (ASPECT, ID, MESSAGE ) __SYCL_ASPECT(ASPECT, ID)
640
- // We don't need "case aspect::usm_allocator" here because it will duplicate
641
- // "case aspect::usm_system_allocations", therefore leave this macro empty
642
- #define __SYCL_ASPECT_DEPRECATED_ALIAS (ASPECT, ID, MESSAGE )
643
- auto getAspectNameStr = [](aspect AspectNum) -> std::string {
644
- switch (AspectNum) {
645
- #include < sycl/info/aspects.def>
646
- #include < sycl/info/aspects_deprecated.def>
647
- }
648
- throw sycl::exception (errc::kernel_not_supported,
649
- " Unknown aspect " +
650
- std::to_string (static_cast <unsigned >(AspectNum)));
651
- };
652
- #undef __SYCL_ASPECT_DEPRECATED_ALIAS
653
- #undef __SYCL_ASPECT_DEPRECATED
654
- #undef __SYCL_ASPECT
655
-
656
- for (RTDeviceBinaryImage::PropertyRange::ConstIterator It : ARange) {
657
- using namespace std ::literals;
658
- if ((*It)->Name == " aspects" sv) {
659
- ByteArray Aspects = DeviceBinaryProperty (*It).asByteArray ();
660
- // 8 because we need to skip 64-bits of size of the byte array
661
- Aspects.dropBytes (8 );
662
- while (!Aspects.empty ()) {
663
- auto Aspect = static_cast <aspect>(Aspects.consume <int >());
664
- if (!Dev->has (Aspect))
665
- throw sycl::exception (errc::kernel_not_supported,
666
- " Required aspect " + getAspectNameStr (Aspect) +
667
- " is not supported on the device" );
668
- }
669
- } else if ((*It)->Name == " reqd_sub_group_size" sv) {
670
- auto ReqdSubGroupSize = DeviceBinaryProperty (*It).asUint32 ();
671
- auto SupportedSubGroupSizes =
672
- Device.get_info <info::device::sub_group_sizes>();
673
-
674
- // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
675
- // as ESIMD images have a reqd-sub-group-size of 1, but currently
676
- // no backend currently includes 1 as a valid sub-group size.
677
- // This can be removed if backends add 1 as a valid sub-group size.
678
- if (!getUint32PropAsBool (Img, " isEsimdImage" ) &&
679
- std::none_of (SupportedSubGroupSizes.cbegin (),
680
- SupportedSubGroupSizes.cend (),
681
- [=](auto s) { return s == ReqdSubGroupSize; }))
682
- throw sycl::exception (errc::kernel_not_supported,
683
- " Sub-group size " +
684
- std::to_string (ReqdSubGroupSize) +
685
- " is not supported on the device" );
686
- }
687
- }
633
+ if (auto exception = checkDevSupportDeviceRequirements (Device, Img))
634
+ throw *exception;
688
635
689
636
auto BuildF = [this , &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
690
637
&LinkOpts, SpecConsts] {
@@ -2456,6 +2403,44 @@ ProgramManager::getOrCreateKernel(const context &Context,
2456
2403
2457
2404
bool doesDevSupportDeviceRequirements (const device &Dev,
2458
2405
const RTDeviceBinaryImage &Img) {
2406
+ return !checkDevSupportDeviceRequirements (Dev, Img).has_value ();
2407
+ }
2408
+
2409
+ static std::string getAspectNameStr (sycl::aspect AspectNum) {
2410
+ #define __SYCL_ASPECT (ASPECT, ID ) \
2411
+ case aspect::ASPECT: \
2412
+ return #ASPECT;
2413
+ #define __SYCL_ASPECT_DEPRECATED (ASPECT, ID, MESSAGE ) __SYCL_ASPECT(ASPECT, ID)
2414
+ // We don't need "case aspect::usm_allocator" here because it will duplicate
2415
+ // "case aspect::usm_system_allocations", therefore leave this macro empty
2416
+ #define __SYCL_ASPECT_DEPRECATED_ALIAS (ASPECT, ID, MESSAGE )
2417
+ switch (AspectNum) {
2418
+ #include < sycl/info/aspects.def>
2419
+ #include < sycl/info/aspects_deprecated.def>
2420
+ }
2421
+ throw sycl::exception (errc::kernel_not_supported,
2422
+ " Unknown aspect " +
2423
+ std::to_string (static_cast <unsigned >(AspectNum)));
2424
+ #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2425
+ #undef __SYCL_ASPECT_DEPRECATED
2426
+ #undef __SYCL_ASPECT
2427
+ }
2428
+
2429
+ // Check if the multiplication over unsigned integers overflows
2430
+ template <typename T>
2431
+ static std::enable_if_t <std::is_unsigned_v<T>, std::optional<T>>
2432
+ multiply_with_overflow_check (T x, T y) {
2433
+ if (y == 0 )
2434
+ return 0 ;
2435
+ if (x > std::numeric_limits<T>::max () / y)
2436
+ return {};
2437
+ else
2438
+ return x * y;
2439
+ }
2440
+
2441
+ std::optional<sycl::exception>
2442
+ checkDevSupportDeviceRequirements (const device &Dev,
2443
+ const RTDeviceBinaryImage &Img) {
2459
2444
auto getPropIt = [&Img](const std::string &PropName) {
2460
2445
const RTDeviceBinaryImage::PropertyRange &PropRange =
2461
2446
Img.getDeviceRequirements ();
@@ -2471,7 +2456,8 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
2471
2456
};
2472
2457
2473
2458
auto AspectsPropIt = getPropIt (" aspects" );
2474
- auto ReqdWGSizePropIt = getPropIt (" reqd_work_group_size" );
2459
+ auto ReqdWGSizeUint32TPropIt = getPropIt (" reqd_work_group_size" );
2460
+ auto ReqdWGSizeUint64TPropIt = getPropIt (" reqd_work_group_size_uint64_t" );
2475
2461
auto ReqdSubGroupSizePropIt = getPropIt (" reqd_sub_group_size" );
2476
2462
2477
2463
// Checking if device supports defined aspects
@@ -2483,28 +2469,54 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
2483
2469
while (!Aspects.empty ()) {
2484
2470
aspect Aspect = Aspects.consume <aspect>();
2485
2471
if (!Dev.has (Aspect))
2486
- return false ;
2472
+ return sycl::exception (errc::kernel_not_supported,
2473
+ " Required aspect " + getAspectNameStr (Aspect) +
2474
+ " is not supported on the device" );
2487
2475
}
2488
2476
}
2489
2477
2490
2478
// Checking if device supports defined required work group size
2491
- if (ReqdWGSizePropIt) {
2492
- ByteArray ReqdWGSize =
2493
- DeviceBinaryProperty (*(ReqdWGSizePropIt.value ())).asByteArray ();
2479
+ if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2480
+ // / TODO: Before intel/llvm#10620, the reqd_work_group_size attribute
2481
+ // stores its values as uint32_t, but this needed to be expanded to
2482
+ // uint64_t. However, this change did not happen in ABI-breaking
2483
+ // window, so we attach the required work-group size as the
2484
+ // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
2485
+ // window, we can remove the logic for the 32 bit property.
2486
+ bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value ();
2487
+ auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2488
+
2489
+ ByteArray ReqdWGSize = DeviceBinaryProperty (*(it.value ())).asByteArray ();
2494
2490
// Drop 8 bytes describing the size of the byte array.
2495
2491
ReqdWGSize.dropBytes (8 );
2496
- int ReqdWGSizeAllDimsTotal = 1 ;
2497
- std::vector<int > ReqdWGSizeVec;
2492
+ uint64_t ReqdWGSizeAllDimsTotal = 1 ;
2493
+ std::vector<uint64_t > ReqdWGSizeVec;
2498
2494
int Dims = 0 ;
2499
2495
while (!ReqdWGSize.empty ()) {
2500
- int SingleDimSize = ReqdWGSize.consume <int >();
2501
- ReqdWGSizeAllDimsTotal *= SingleDimSize;
2496
+ uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume <uint64_t >()
2497
+ : ReqdWGSize.consume <uint32_t >();
2498
+ if (auto res = multiply_with_overflow_check (ReqdWGSizeAllDimsTotal,
2499
+ SingleDimSize))
2500
+ ReqdWGSizeAllDimsTotal = *res;
2501
+ else
2502
+ return sycl::exception (
2503
+ sycl::errc::kernel_not_supported,
2504
+ " Required work-group size is not supported"
2505
+ " (total number of work-items requested can't fit into size_t)" );
2502
2506
ReqdWGSizeVec.push_back (SingleDimSize);
2503
2507
Dims++;
2504
2508
}
2505
- if (static_cast <size_t >(ReqdWGSizeAllDimsTotal) >
2506
- Dev.get_info <info::device::max_work_group_size>())
2507
- return false ;
2509
+
2510
+ // The SingleDimSize was computed in an uint64_t; size_t does not
2511
+ // necessarily have to be the same uint64_t (but should fit in an
2512
+ // uint64_t).
2513
+ if (ReqdWGSizeAllDimsTotal >
2514
+ Dev.get_info <info::device::max_work_group_size>() ||
2515
+ ReqdWGSizeAllDimsTotal > std::numeric_limits<size_t >::max ())
2516
+ return sycl::exception (sycl::errc::kernel_not_supported,
2517
+ " Required work-group size " +
2518
+ std::to_string (ReqdWGSizeAllDimsTotal) +
2519
+ " is not supported on the device" );
2508
2520
// Creating std::variant to call max_work_item_sizes one time to avoid
2509
2521
// performance drop
2510
2522
std::variant<id<1 >, id<2 >, id<3 >> MaxWorkItemSizesVariant;
@@ -2522,17 +2534,26 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
2522
2534
// issues after that
2523
2535
if (Dims == 1 ) {
2524
2536
// ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
2525
- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2537
+ if (ReqdWGSizeVec[i] >
2526
2538
std::get<id<1 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2527
- return false ;
2539
+ return sycl::exception (sycl::errc::kernel_not_supported,
2540
+ " Required work-group size " +
2541
+ std::to_string (ReqdWGSizeVec[i]) +
2542
+ " is not supported" );
2528
2543
} else if (Dims == 2 ) {
2529
- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2544
+ if (ReqdWGSizeVec[i] >
2530
2545
std::get<id<2 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2531
- return false ;
2546
+ return sycl::exception (sycl::errc::kernel_not_supported,
2547
+ " Required work-group size " +
2548
+ std::to_string (ReqdWGSizeVec[i]) +
2549
+ " is not supported" );
2532
2550
} else // (Dims == 3)
2533
- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2551
+ if (ReqdWGSizeVec[i] >
2534
2552
std::get<id<3 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2535
- return false ;
2553
+ return sycl::exception (sycl::errc::kernel_not_supported,
2554
+ " Required work-group size " +
2555
+ std::to_string (ReqdWGSizeVec[i]) +
2556
+ " is not supported" );
2536
2557
}
2537
2558
}
2538
2559
@@ -2541,14 +2562,21 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
2541
2562
auto ReqdSubGroupSize =
2542
2563
DeviceBinaryProperty (*(ReqdSubGroupSizePropIt.value ())).asUint32 ();
2543
2564
auto SupportedSubGroupSizes = Dev.get_info <info::device::sub_group_sizes>();
2565
+ // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
2566
+ // as ESIMD images have a reqd-sub-group-size of 1, but currently
2567
+ // no backend currently includes 1 as a valid sub-group size.
2568
+ // This can be removed if backends add 1 as a valid sub-group size.
2544
2569
if (!getUint32PropAsBool (Img, " isEsimdImage" ) &&
2545
2570
std::none_of (SupportedSubGroupSizes.cbegin (),
2546
2571
SupportedSubGroupSizes.cend (),
2547
2572
[=](auto s) { return s == ReqdSubGroupSize; }))
2548
- return false ;
2573
+ return sycl::exception (sycl::errc::kernel_not_supported,
2574
+ " Sub-group size " +
2575
+ std::to_string (ReqdSubGroupSize) +
2576
+ " is not supported on the device" );
2549
2577
}
2550
2578
2551
- return true ;
2579
+ return {} ;
2552
2580
}
2553
2581
2554
2582
} // namespace detail
0 commit comments