@@ -31,8 +31,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
31
31
ze_kernel_handle_t ZeKernel{};
32
32
UR_CALL (getZeKernel (Legacy (hQueue)->Device ->ZeDevice , hKernel, &ZeKernel));
33
33
34
- UR_CALL (getSuggestedLocalWorkSize (Legacy (hQueue), ZeKernel, GlobalWorkSize3D ,
35
- LocalWorkSize));
34
+ UR_CALL (getSuggestedLocalWorkSize (Legacy (hQueue)-> Device , ZeKernel,
35
+ GlobalWorkSize3D, LocalWorkSize));
36
36
37
37
std::copy (LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize);
38
38
return UR_RESULT_SUCCESS;
@@ -54,52 +54,6 @@ ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
54
54
return UR_RESULT_SUCCESS;
55
55
}
56
56
57
- ur_result_t getSuggestedLocalWorkSize (ur_queue_handle_legacy_t hQueue,
58
- ze_kernel_handle_t hZeKernel,
59
- size_t GlobalWorkSize3D[3 ],
60
- uint32_t SuggestedLocalWorkSize3D[3 ]) {
61
- uint32_t *WG = SuggestedLocalWorkSize3D;
62
-
63
- // We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize
64
- // values do not fit to 32-bit that the API only supports currently.
65
- bool SuggestGroupSize = true ;
66
- for (int I : {0 , 1 , 2 }) {
67
- if (GlobalWorkSize3D[I] > UINT32_MAX) {
68
- SuggestGroupSize = false ;
69
- }
70
- }
71
- if (SuggestGroupSize) {
72
- ZE2UR_CALL (zeKernelSuggestGroupSize,
73
- (hZeKernel, GlobalWorkSize3D[0 ], GlobalWorkSize3D[1 ],
74
- GlobalWorkSize3D[2 ], &WG[0 ], &WG[1 ], &WG[2 ]));
75
- } else {
76
- for (int I : {0 , 1 , 2 }) {
77
- // Try to find a I-dimension WG size that the GlobalWorkSize[I] is
78
- // fully divisable with. Start with the max possible size in
79
- // each dimension.
80
- uint32_t GroupSize[] = {
81
- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeX ,
82
- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeY ,
83
- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeZ };
84
- GroupSize[I] = (std::min)(size_t (GroupSize[I]), GlobalWorkSize3D[I]);
85
- while (GlobalWorkSize3D[I] % GroupSize[I]) {
86
- --GroupSize[I];
87
- }
88
- if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) {
89
- logger::error (" getSuggestedLocalWorkSize: can't find a WG size "
90
- " suitable for global work size > UINT32_MAX" );
91
- return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
92
- }
93
- WG[I] = GroupSize[I];
94
- }
95
- logger::debug (
96
- " getSuggestedLocalWorkSize: using computed WG size = {{{}, {}, {}}}" ,
97
- WG[0 ], WG[1 ], WG[2 ]);
98
- }
99
-
100
- return UR_RESULT_SUCCESS;
101
- }
102
-
103
57
ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch (
104
58
ur_kernel_handle_t Kernel, // /< [in] handle of the kernel object
105
59
uint32_t WorkDim, // /< [in] number of dimensions, from 1 to 3, to specify
0 commit comments