@@ -476,22 +476,21 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
476
476
// (e.g. clGetKernelWorkGroupInfo returns CL_INVALID_KERNEL if kernel was
477
477
// created from the program built for sub-device and re-used either on root or
478
478
// other sub-device).
479
- // To work around this case we optimize only one case: root device shares the
480
- // same context with its sub-device(s). We built for the root device and
481
- // cache the results.
482
- // The expected solution is to build for any sub-device and use root device
483
- // handle as cache key to share build results for any other sub-device or even
484
- // a root device.
485
- // TODO: it might be worth testing if Level Zero plug-in supports all cases
486
- // and enable more cases for Level Zero.
479
+ // To work around this case we optimize only one case if device type is CPU:
480
+ // root device shares the same context with its sub-device(s). We built for
481
+ // the root device and cache the results.
482
+ // The solution for other devices is to build for any sub-device and use root
483
+ // device handle as cache key to share build results for any other sub-device
484
+ // or even a root device.
487
485
DeviceImplPtr Dev = DeviceImpl;
488
- while (!Dev->isRootDevice ()) {
489
- auto ParentDev =
490
- detail::getSyclObjImpl (Dev->get_info <info::device::parent_device>());
491
- if (!ContextImpl->hasDevice (ParentDev))
492
- break ;
493
- Dev = ParentDev;
494
- }
486
+ if (!Dev->is_gpu ())
487
+ while (!Dev->isRootDevice ()) {
488
+ auto ParentDev =
489
+ detail::getSyclObjImpl (Dev->get_info <info::device::parent_device>());
490
+ if (!ContextImpl->hasDevice (ParentDev))
491
+ break ;
492
+ Dev = ParentDev;
493
+ }
495
494
496
495
auto BuildF = [this , &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
497
496
&LinkOpts, &JITCompilationIsRequired, SpecConsts] {
@@ -546,6 +545,16 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
546
545
return BuiltProgram.release ();
547
546
};
548
547
548
+ // Use root device as a cache key
549
+ // FIXME: on CPU we can't re-use results unless "Dev" is a root device already
550
+ // due to Intel OpenCL CPU bug(?). This solution is tested only on Intel GPU
551
+ // implementation.
552
+ if (Dev->is_gpu ())
553
+ while (!Dev->isRootDevice ()) {
554
+ auto ParentDev =
555
+ detail::getSyclObjImpl (Dev->get_info <info::device::parent_device>());
556
+ Dev = ParentDev;
557
+ }
549
558
const RT::PiDevice PiDevice = Dev->getHandleRef ();
550
559
551
560
auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
0 commit comments