@@ -1824,6 +1824,7 @@ def dpnp_solve(a, b):
1824
1824
return dpnp .empty_like (b , dtype = res_type , usm_type = res_usm_type )
1825
1825
1826
1826
if a .ndim > 2 :
1827
+ is_cpu_device = exec_q .sycl_device .has_aspect_cpu
1827
1828
reshape = False
1828
1829
orig_shape_b = b_shape
1829
1830
if a .ndim > 3 :
@@ -1850,22 +1851,27 @@ def dpnp_solve(a, b):
1850
1851
for i in range (batch_size ):
1851
1852
# oneMKL LAPACK assumes fortran-like array as input, so
1852
1853
# allocate a memory with 'F' order for dpnp array of coefficient matrix
1853
- # and multiple dependent variables array
1854
1854
coeff_vecs [i ] = dpnp .empty_like (
1855
1855
a [i ], order = "F" , dtype = res_type , usm_type = res_usm_type
1856
1856
)
1857
- val_vecs [i ] = dpnp .empty_like (
1858
- b [i ], order = "F" , dtype = res_type , usm_type = res_usm_type
1859
- )
1860
1857
1861
1858
# use DPCTL tensor function to fill the coefficient matrix array
1862
- # and the array of multiple dependent variables with content
1863
- # from the input arrays
1859
+ # with content from the input array
1864
1860
a_ht_copy_ev [i ], a_copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
1865
1861
src = a_usm_arr [i ],
1866
1862
dst = coeff_vecs [i ].get_array (),
1867
1863
sycl_queue = a .sycl_queue ,
1868
1864
)
1865
+
1866
+ # oneMKL LAPACK assumes fortran-like array as input, so
1867
+ # allocate a memory with 'F' order for dpnp array of multiple
1868
+ # dependent variables array
1869
+ val_vecs [i ] = dpnp .empty_like (
1870
+ b [i ], order = "F" , dtype = res_type , usm_type = res_usm_type
1871
+ )
1872
+
1873
+ # use DPCTL tensor function to fill the array of multiple dependent
1874
+ # variables with content from the input arrays
1869
1875
b_ht_copy_ev [i ], b_copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
1870
1876
src = b_usm_arr [i ],
1871
1877
dst = val_vecs [i ].get_array (),
@@ -1882,6 +1888,15 @@ def dpnp_solve(a, b):
1882
1888
depends = [a_copy_ev , b_copy_ev ],
1883
1889
)
1884
1890
1891
+ # TODO: Remove this w/a when MKLD-17201 is solved.
1892
+ # Waiting for a host task executing an OneMKL LAPACK gesv call
1893
+ # on CPU causes deadlock due to serialization of all host tasks
1894
+ # in the queue.
1895
+ # We need to wait for each host tasks before calling _gesv to avoid deadlock.
1896
+ if is_cpu_device :
1897
+ ht_lapack_ev [i ].wait ()
1898
+ b_ht_copy_ev [i ].wait ()
1899
+
1885
1900
for i in range (batch_size ):
1886
1901
ht_lapack_ev [i ].wait ()
1887
1902
b_ht_copy_ev [i ].wait ()
0 commit comments