@@ -1617,6 +1617,65 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
1617
1617
}
1618
1618
}
1619
1619
1620
+ pi_result ExecCGCommand::SetKernelParamsAndLaunch (
1621
+ CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
1622
+ std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event) {
1623
+ const detail::plugin &Plugin = MQueue->getPlugin ();
1624
+ for (ArgDesc &Arg : ExecKernel->MArgs ) {
1625
+ switch (Arg.MType ) {
1626
+ case kernel_param_kind_t ::kind_accessor: {
1627
+ Requirement *Req = (Requirement *)(Arg.MPtr );
1628
+ AllocaCommandBase *AllocaCmd = getAllocaForReq (Req);
1629
+ RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation ();
1630
+ if (Plugin.getBackend () == backend::opencl) {
1631
+ Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex ,
1632
+ sizeof (RT::PiMem), &MemArg);
1633
+ } else {
1634
+ Plugin.call <PiApiKind::piextKernelSetArgMemObj>(Kernel, Arg.MIndex ,
1635
+ &MemArg);
1636
+ }
1637
+ break ;
1638
+ }
1639
+ case kernel_param_kind_t ::kind_std_layout: {
1640
+ Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex , Arg.MSize ,
1641
+ Arg.MPtr );
1642
+ break ;
1643
+ }
1644
+ case kernel_param_kind_t ::kind_sampler: {
1645
+ sampler *SamplerPtr = (sampler *)Arg.MPtr ;
1646
+ RT::PiSampler Sampler = detail::getSyclObjImpl (*SamplerPtr)
1647
+ ->getOrCreateSampler (MQueue->get_context ());
1648
+ Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex ,
1649
+ sizeof (cl_sampler), &Sampler);
1650
+ break ;
1651
+ }
1652
+ case kernel_param_kind_t ::kind_pointer: {
1653
+ Plugin.call <PiApiKind::piextKernelSetArgPointer>(Kernel, Arg.MIndex ,
1654
+ Arg.MSize , Arg.MPtr );
1655
+ break ;
1656
+ }
1657
+ }
1658
+ }
1659
+
1660
+ adjustNDRangePerKernel (NDRDesc, Kernel,
1661
+ *(detail::getSyclObjImpl (MQueue->get_device ())));
1662
+
1663
+ // Some PI Plugins (like OpenCL) require this call to enable USM
1664
+ // For others, PI will turn this into a NOP.
1665
+ Plugin.call <PiApiKind::piKernelSetExecInfo>(Kernel, PI_USM_INDIRECT_ACCESS,
1666
+ sizeof (pi_bool), &PI_TRUE);
1667
+
1668
+ // Remember this information before the range dimensions are reversed
1669
+ const bool HasLocalSize = (NDRDesc.LocalSize [0 ] != 0 );
1670
+
1671
+ ReverseRangeDimensionsForKernel (NDRDesc);
1672
+ pi_result Error = Plugin.call_nocheck <PiApiKind::piEnqueueKernelLaunch>(
1673
+ MQueue->getHandleRef (), Kernel, NDRDesc.Dims , &NDRDesc.GlobalOffset [0 ],
1674
+ &NDRDesc.GlobalSize [0 ], HasLocalSize ? &NDRDesc.LocalSize [0 ] : nullptr ,
1675
+ RawEvents.size (), RawEvents.empty () ? nullptr : &RawEvents[0 ], &Event);
1676
+ return Error;
1677
+ }
1678
+
1620
1679
// The function initialize accessors and calls lambda.
1621
1680
// The function is used as argument to piEnqueueNativeKernel which requires
1622
1681
// that the passed function takes one void* argument.
@@ -1823,73 +1882,15 @@ cl_int ExecCGCommand::enqueueImp() {
1823
1882
nullptr );
1824
1883
}
1825
1884
1826
- auto SetKernelParamsAndLaunch = [this , &ExecKernel, &Kernel, &NDRDesc,
1827
- &RawEvents, &Event] {
1828
- const detail::plugin &Plugin = MQueue->getPlugin ();
1829
- for (ArgDesc &Arg : ExecKernel->MArgs ) {
1830
- switch (Arg.MType ) {
1831
- case kernel_param_kind_t ::kind_accessor: {
1832
- Requirement *Req = (Requirement *)(Arg.MPtr );
1833
- AllocaCommandBase *AllocaCmd = getAllocaForReq (Req);
1834
- RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation ();
1835
- if (Plugin.getBackend () == backend::opencl) {
1836
- Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex ,
1837
- sizeof (RT::PiMem), &MemArg);
1838
- } else {
1839
- Plugin.call <PiApiKind::piextKernelSetArgMemObj>(Kernel, Arg.MIndex ,
1840
- &MemArg);
1841
- }
1842
- break ;
1843
- }
1844
- case kernel_param_kind_t ::kind_std_layout: {
1845
- Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex , Arg.MSize ,
1846
- Arg.MPtr );
1847
- break ;
1848
- }
1849
- case kernel_param_kind_t ::kind_sampler: {
1850
- sampler *SamplerPtr = (sampler *)Arg.MPtr ;
1851
- RT::PiSampler Sampler =
1852
- detail::getSyclObjImpl (*SamplerPtr)
1853
- ->getOrCreateSampler (MQueue->get_context ());
1854
- Plugin.call <PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex ,
1855
- sizeof (cl_sampler), &Sampler);
1856
- break ;
1857
- }
1858
- case kernel_param_kind_t ::kind_pointer: {
1859
- Plugin.call <PiApiKind::piextKernelSetArgPointer>(Kernel, Arg.MIndex ,
1860
- Arg.MSize , Arg.MPtr );
1861
- break ;
1862
- }
1863
- }
1864
- }
1865
-
1866
- adjustNDRangePerKernel (NDRDesc, Kernel,
1867
- *(detail::getSyclObjImpl (MQueue->get_device ())));
1868
-
1869
- // Some PI Plugins (like OpenCL) require this call to enable USM
1870
- // For others, PI will turn this into a NOP.
1871
- Plugin.call <PiApiKind::piKernelSetExecInfo>(
1872
- Kernel, PI_USM_INDIRECT_ACCESS, sizeof (pi_bool), &PI_TRUE);
1873
-
1874
- // Remember this information before the range dimensions are reversed
1875
- const bool HasLocalSize = (NDRDesc.LocalSize [0 ] != 0 );
1876
-
1877
- ReverseRangeDimensionsForKernel (NDRDesc);
1878
- pi_result Error = Plugin.call_nocheck <PiApiKind::piEnqueueKernelLaunch>(
1879
- MQueue->getHandleRef (), Kernel, NDRDesc.Dims ,
1880
- &NDRDesc.GlobalOffset [0 ], &NDRDesc.GlobalSize [0 ],
1881
- HasLocalSize ? &NDRDesc.LocalSize [0 ] : nullptr , RawEvents.size (),
1882
- RawEvents.empty () ? nullptr : &RawEvents[0 ], &Event);
1883
- return Error;
1884
- };
1885
-
1886
1885
pi_result Error = PI_SUCCESS;
1887
1886
if (KernelMutex != nullptr ) {
1888
1887
// For cacheable kernels, we use per-kernel mutex
1889
1888
std::lock_guard<std::mutex> Lock (*KernelMutex);
1890
- Error = SetKernelParamsAndLaunch ();
1889
+ Error = SetKernelParamsAndLaunch (ExecKernel, Kernel, NDRDesc, RawEvents,
1890
+ Event);
1891
1891
} else {
1892
- Error = SetKernelParamsAndLaunch ();
1892
+ Error = SetKernelParamsAndLaunch (ExecKernel, Kernel, NDRDesc, RawEvents,
1893
+ Event);
1893
1894
}
1894
1895
1895
1896
if (PI_SUCCESS != Error) {
0 commit comments