@@ -857,72 +857,91 @@ def dpnp_det(a):
857
857
return det .reshape (shape )
858
858
859
859
860
- def dpnp_eigh (a , UPLO ):
860
+ def dpnp_eigh (a , UPLO , eigen_mode = "V" ):
861
861
"""
862
- dpnp_eigh(a, UPLO)
862
+ dpnp_eigh(a, UPLO, eigen_mode="V" )
863
863
864
864
Return the eigenvalues and eigenvectors of a complex Hermitian
865
865
(conjugate symmetric) or a real symmetric matrix.
866
+ Can return both eigenvalues and eigenvectors (`eigen_mode="V"`) or
867
+ only eigenvalues (`eigen_mode="N"`).
866
868
867
869
The main calculation is done by calling an extension function
868
870
for LAPACK library of OneMKL. Depending on input type of `a` array,
869
871
it will be either ``heevd`` (for complex types) or ``syevd`` (for others).
870
872
871
873
"""
872
874
873
- a_usm_type = a .usm_type
874
- a_sycl_queue = a .sycl_queue
875
- a_order = "C" if a .flags .c_contiguous else "F"
876
- a_usm_arr = dpnp .get_usm_ndarray (a )
875
+ # get resulting type of arrays with eigenvalues and eigenvectors
876
+ v_type = _common_type (a )
877
+ w_type = _real_type (v_type )
877
878
878
- # 'V' means both eigenvectors and eigenvalues will be calculated
879
- jobz = _jobz ["V" ]
879
+ if a .size == 0 :
880
+ w = dpnp .empty_like (a , shape = a .shape [:- 1 ], dtype = w_type )
881
+ if eigen_mode == "V" :
882
+ v = dpnp .empty_like (a , dtype = v_type )
883
+ return w , v
884
+ return w
885
+
886
+ # `eigen_mode` can be either "N" or "V", specifying the computation mode
887
+ # for OneMKL LAPACK `syevd` and `heevd` routines.
888
+ # "V" (default) means both eigenvectors and eigenvalues will be calculated
889
+ # "N" means only eigenvalues will be calculated
890
+ jobz = _jobz [eigen_mode ]
880
891
uplo = _upper_lower [UPLO ]
881
892
882
- # get resulting type of arrays with eigenvalues and eigenvectors
883
- a_dtype = a .dtype
884
- lapack_func = "_syevd"
885
- if dpnp .issubdtype (a_dtype , dpnp .complexfloating ):
886
- lapack_func = "_heevd"
887
- v_type = a_dtype
888
- w_type = dpnp .float64 if a_dtype == dpnp .complex128 else dpnp .float32
889
- elif dpnp .issubdtype (a_dtype , dpnp .floating ):
890
- v_type = w_type = a_dtype
891
- elif a_sycl_queue .sycl_device .has_aspect_fp64 :
892
- v_type = w_type = dpnp .float64
893
- else :
894
- v_type = w_type = dpnp .float32
893
+ # Get LAPACK function (_syevd for real or _heevd for complex data types)
894
+ # to compute all eigenvalues and, optionally, all eigenvectors
895
+ lapack_func = (
896
+ "_heevd" if dpnp .issubdtype (v_type , dpnp .complexfloating ) else "_syevd"
897
+ )
898
+
899
+ a_sycl_queue = a .sycl_queue
900
+ a_order = "C" if a .flags .c_contiguous else "F"
895
901
896
902
if a .ndim > 2 :
897
- w = dpnp .empty (
898
- a .shape [:- 1 ],
903
+ is_cpu_device = a .sycl_device .has_aspect_cpu
904
+ orig_shape = a .shape
905
+ # get 3d input array by reshape
906
+ a = a .reshape (- 1 , orig_shape [- 2 ], orig_shape [- 1 ])
907
+ a_usm_arr = dpnp .get_usm_ndarray (a )
908
+
909
+ # allocate a memory for dpnp array of eigenvalues
910
+ w = dpnp .empty_like (
911
+ a ,
912
+ shape = orig_shape [:- 1 ],
899
913
dtype = w_type ,
900
- usm_type = a_usm_type ,
901
- sycl_queue = a_sycl_queue ,
902
914
)
915
+ w_orig_shape = w .shape
916
+ # get 2d dpnp array with eigenvalues by reshape
917
+ w = w .reshape (- 1 , w_orig_shape [- 1 ])
903
918
904
919
# need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A
905
- op_count = a .shape [0 ]
906
- if op_count == 0 :
907
- return w , dpnp .empty_like (a , dtype = v_type )
908
-
909
- eig_vecs = [None ] * op_count
910
- ht_copy_ev = [None ] * op_count
911
- ht_lapack_ev = [None ] * op_count
912
- for i in range (op_count ):
920
+ batch_size = a .shape [0 ]
921
+ eig_vecs = [None ] * batch_size
922
+ ht_list_ev = [None ] * batch_size * 2
923
+ for i in range (batch_size ):
913
924
# oneMKL LAPACK assumes fortran-like array as input, so
914
925
# allocate a memory with 'F' order for dpnp array of eigenvectors
915
926
eig_vecs [i ] = dpnp .empty_like (a [i ], order = "F" , dtype = v_type )
916
927
917
928
# use DPCTL tensor function to fill the array of eigenvectors with content of input array
918
- ht_copy_ev [ i ], copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
929
+ ht_list_ev [ 2 * i ], copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
919
930
src = a_usm_arr [i ],
920
931
dst = eig_vecs [i ].get_array (),
921
932
sycl_queue = a_sycl_queue ,
922
933
)
923
934
935
+ # TODO: Remove this w/a when MKLD-17201 is solved.
936
+ # Waiting for a host task executing an OneMKL LAPACK syevd call
937
+ # on CPU causes deadlock due to serialization of all host tasks
938
+ # in the queue.
939
+ # We need to wait for each host tasks before calling _seyvd to avoid deadlock.
940
+ if lapack_func == "_syevd" and is_cpu_device :
941
+ ht_list_ev [2 * i ].wait ()
942
+
924
943
# call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
925
- ht_lapack_ev [ i ], _ = getattr (li , lapack_func )(
944
+ ht_list_ev [ 2 * i + 1 ], _ = getattr (li , lapack_func )(
926
945
a_sycl_queue ,
927
946
jobz ,
928
947
uplo ,
@@ -931,29 +950,43 @@ def dpnp_eigh(a, UPLO):
931
950
depends = [copy_ev ],
932
951
)
933
952
934
- for i in range (op_count ):
935
- ht_lapack_ev [i ].wait ()
936
- ht_copy_ev [i ].wait ()
953
+ dpctl .SyclEvent .wait_for (ht_list_ev )
954
+
955
+ w = w .reshape (w_orig_shape )
956
+
957
+ if eigen_mode == "V" :
958
+ # combine the list of eigenvectors into a single array
959
+ v = dpnp .array (eig_vecs , order = a_order ).reshape (orig_shape )
960
+ return w , v
961
+ return w
937
962
938
- # combine the list of eigenvectors into a single array
939
- v = dpnp .array (eig_vecs , order = a_order )
940
- return w , v
941
963
else :
942
- # oneMKL LAPACK assumes fortran-like array as input, so
943
- # allocate a memory with 'F' order for dpnp array of eigenvectors
944
- v = dpnp . empty_like ( a , order = "F" , dtype = v_type )
964
+ a_usm_arr = dpnp . get_usm_ndarray ( a )
965
+ ht_list_ev = []
966
+ copy_ev = dpctl . SyclEvent ( )
945
967
946
- # use DPCTL tensor function to fill the array of eigenvectors with content of input array
947
- ht_copy_ev , copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
948
- src = a_usm_arr , dst = v .get_array (), sycl_queue = a_sycl_queue
949
- )
968
+ # When `eigen_mode == "N"` (jobz == 0), OneMKL LAPACK does not overwrite the input array.
969
+ # If the input array 'a' is already F-contiguous and matches the target data type,
970
+ # we can avoid unnecessary memory allocation and data copying.
971
+ if eigen_mode == "N" and a_order == "F" and a .dtype == v_type :
972
+ v = a
973
+
974
+ else :
975
+ # oneMKL LAPACK assumes fortran-like array as input, so
976
+ # allocate a memory with 'F' order for dpnp array of eigenvectors
977
+ v = dpnp .empty_like (a , order = "F" , dtype = v_type )
978
+
979
+ # use DPCTL tensor function to fill the array of eigenvectors with content of input array
980
+ ht_copy_ev , copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
981
+ src = a_usm_arr , dst = v .get_array (), sycl_queue = a_sycl_queue
982
+ )
983
+ ht_list_ev .append (ht_copy_ev )
950
984
951
985
# allocate a memory for dpnp array of eigenvalues
952
- w = dpnp .empty (
953
- a .shape [:- 1 ],
986
+ w = dpnp .empty_like (
987
+ a ,
988
+ shape = a .shape [:- 1 ],
954
989
dtype = w_type ,
955
- usm_type = a_usm_type ,
956
- sycl_queue = a_sycl_queue ,
957
990
)
958
991
959
992
# call LAPACK extension function to get eigenvalues and eigenvectors of matrix A
@@ -965,8 +998,9 @@ def dpnp_eigh(a, UPLO):
965
998
w .get_array (),
966
999
depends = [copy_ev ],
967
1000
)
1001
+ ht_list_ev .append (ht_lapack_ev )
968
1002
969
- if a_order != "F" :
1003
+ if eigen_mode == "V" and a_order != "F" :
970
1004
# need to align order of eigenvectors with one of input matrix A
971
1005
out_v = dpnp .empty_like (v , order = a_order )
972
1006
ht_copy_out_ev , _ = ti ._copy_usm_ndarray_into_usm_ndarray (
@@ -975,14 +1009,13 @@ def dpnp_eigh(a, UPLO):
975
1009
sycl_queue = a_sycl_queue ,
976
1010
depends = [lapack_ev ],
977
1011
)
978
- ht_copy_out_ev . wait ( )
1012
+ ht_list_ev . append ( ht_copy_out_ev )
979
1013
else :
980
1014
out_v = v
981
1015
982
- ht_lapack_ev .wait ()
983
- ht_copy_ev .wait ()
1016
+ dpctl .SyclEvent .wait_for (ht_list_ev )
984
1017
985
- return w , out_v
1018
+ return ( w , out_v ) if eigen_mode == "V" else w
986
1019
987
1020
988
1021
def dpnp_inv_batched (a , res_type ):
0 commit comments