@@ -955,7 +955,15 @@ def nansum(
955
955
956
956
957
957
def nanstd (
958
- a , axis = None , dtype = None , out = None , ddof = 0 , keepdims = False , * , where = True
958
+ a ,
959
+ axis = None ,
960
+ dtype = None ,
961
+ out = None ,
962
+ ddof = 0 ,
963
+ keepdims = False ,
964
+ * ,
965
+ where = True ,
966
+ mean = None ,
959
967
):
960
968
"""
961
969
Compute the standard deviation along the specified axis,
@@ -969,40 +977,46 @@ def nanstd(
969
977
Input array.
970
978
axis : {None, int, tuple of ints}, optional
971
979
Axis or axes along which the standard deviations must be computed.
972
- If a tuple of unique integers is given, the standard deviations
973
- are computed over multiple axes. If ``None``, the standard deviation
974
- is computed over the entire array.
980
+ If a tuple of unique integers is given, the standard deviations are
981
+ computed over multiple axes. If ``None``, the standard deviation is
982
+ computed over the entire array.
975
983
Default: ``None``.
976
984
dtype : {None, dtype}, optional
977
- Type to use in computing the standard deviation. By default,
978
- if `a` has a floating-point data type, the returned array
979
- will have the same data type as `a`.
980
- If `a` has a boolean or integral data type, the returned array
981
- will have the default floating point data type for the device
985
+ Type to use in computing the standard deviation. By default, if `a` has
986
+ a floating-point data type, the returned array will have the same data
987
+ type as `a`. If `a` has a boolean or integral data type, the returned
988
+ array will have the default floating point data type for the device
982
989
where input array `a` is allocated.
990
+ Default: ``None``.
983
991
out : {None, dpnp.ndarray, usm_ndarray}, optional
984
992
Alternative output array in which to place the result. It must have
985
993
the same shape as the expected output but the type (of the calculated
986
994
values) will be cast if necessary.
995
+ Default: ``None``.
987
996
ddof : {int, float}, optional
988
- Means Delta Degrees of Freedom. The divisor used in calculations
989
- is ``N - ddof``, where ``N`` the number of non-NaN elements.
990
- Default: `0.0`.
997
+ Means Delta Degrees of Freedom. The divisor used in calculations is
998
+ ``N - ddof``, where ``N`` the number of non-NaN elements.
999
+ Default: `` 0.0` `.
991
1000
keepdims : {None, bool}, optional
992
1001
If ``True``, the reduced axes (dimensions) are included in the result
993
- as singleton dimensions, so that the returned array remains
994
- compatible with the input array according to Array Broadcasting
995
- rules. Otherwise, if ``False``, the reduced axes are not included in
996
- the returned array. Default: ``False``.
1002
+ as singleton dimensions, so that the returned array remains compatible
1003
+ with the input array according to Array Broadcasting rules. Otherwise,
1004
+ if ``False``, the reduced axes are not included in the returned array.
1005
+ Default: ``False``.
1006
+ mean : {dpnp.ndarray, usm_ndarray}, optional
1007
+ Provide the mean to prevent its recalculation. The mean should have
1008
+ a shape as if it was calculated with ``keepdims=True``.
1009
+ The axis for the calculation of the mean should be the same as used in
1010
+ the call to this `nanstd` function.
1011
+ Default: ``None``.
997
1012
998
1013
Returns
999
1014
-------
1000
1015
out : dpnp.ndarray
1001
- An array containing the standard deviations. If the standard
1002
- deviation was computed over the entire array, a zero-dimensional
1003
- array is returned. If `ddof` is >= the number of non-NaN elements
1004
- in a slice or the slice contains only NaNs, then the result for
1005
- that slice is NaN.
1016
+ An array containing the standard deviations. If the standard deviation
1017
+ was computed over the entire array, a zero-dimensional array is
1018
+ returned. If `ddof` is >= the number of non-NaN elements in a slice or
1019
+ the slice contains only NaNs, then the result for that slice is NaN.
1006
1020
1007
1021
Limitations
1008
1022
-----------
@@ -1011,6 +1025,19 @@ def nanstd(
1011
1025
1012
1026
Notes
1013
1027
-----
1028
+ The standard deviation is the square root of the average of the squared
1029
+ deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1030
+
1031
+ The average squared deviation is normally calculated as ``x.sum() / N``,
1032
+ where ``N = len(x)``. If, however, `ddof` is specified, the divisor
1033
+ ``N - ddof`` is used instead. In standard statistical practice, ``ddof=1``
1034
+ provides an unbiased estimator of the variance of the infinite population.
1035
+ ``ddof=0`` provides a maximum likelihood estimate of the variance for
1036
+ normally distributed variables.
1037
+ The standard deviation computed in this function is the square root of
1038
+ the estimated variance, so even with ``ddof=1``, it will not be an unbiased
1039
+ estimate of the standard deviation per se.
1040
+
1014
1041
Note that, for complex numbers, the absolute value is taken before
1015
1042
squaring, so that the result is always real and non-negative.
1016
1043
@@ -1029,11 +1056,18 @@ def nanstd(
1029
1056
>>> import dpnp as np
1030
1057
>>> a = np.array([[1, np.nan], [3, 4]])
1031
1058
>>> np.nanstd(a)
1032
- array(1.247219128924647 )
1059
+ array(1.24721913 )
1033
1060
>>> np.nanstd(a, axis=0)
1034
- array([1., 0.])
1061
+ array([1., 0.])
1035
1062
>>> np.nanstd(a, axis=1)
1036
- array([0., 0.5]) # may vary
1063
+ array([0. , 0.5]) # may vary
1064
+
1065
+ Using the mean keyword to save computation time:
1066
+
1067
+ >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1068
+ >>> mean = np.nanmean(a, axis=1, keepdims=True)
1069
+ >>> np.nanstd(a, axis=1, mean=mean)
1070
+ array([2.49443826, 1.47901995, 4.0824829 ])
1037
1071
1038
1072
"""
1039
1073
@@ -1051,13 +1085,21 @@ def nanstd(
1051
1085
ddof = ddof ,
1052
1086
keepdims = keepdims ,
1053
1087
where = where ,
1088
+ mean = mean ,
1054
1089
)
1055
- dpnp .sqrt (res , out = res )
1056
- return res
1090
+ return dpnp .sqrt (res , out = res )
1057
1091
1058
1092
1059
1093
def nanvar (
1060
- a , axis = None , dtype = None , out = None , ddof = 0 , keepdims = False , * , where = True
1094
+ a ,
1095
+ axis = None ,
1096
+ dtype = None ,
1097
+ out = None ,
1098
+ ddof = 0 ,
1099
+ keepdims = False ,
1100
+ * ,
1101
+ where = True ,
1102
+ mean = None ,
1061
1103
):
1062
1104
"""
1063
1105
Compute the variance along the specified axis, while ignoring NaNs.
@@ -1069,39 +1111,46 @@ def nanvar(
1069
1111
a : {dpnp.ndarray, usm_ndarray}
1070
1112
Input array.
1071
1113
axis : {None, int, tuple of ints}, optional
1072
- axis or axes along which the variances must be computed. If a tuple
1114
+ Axis or axes along which the variances must be computed. If a tuple
1073
1115
of unique integers is given, the variances are computed over multiple
1074
1116
axes. If ``None``, the variance is computed over the entire array.
1075
1117
Default: ``None``.
1076
1118
dtype : {None, dtype}, optional
1077
1119
Type to use in computing the variance. By default, if `a` has a
1078
1120
floating-point data type, the returned array will have
1079
- the same data type as `a`.
1080
- If `a` has a boolean or integral data type, the returned array
1081
- will have the default floating point data type for the device
1082
- where input array `a` is allocated .
1121
+ the same data type as `a`. If `a` has a boolean or integral data type,
1122
+ the returned array will have the default floating point data type for
1123
+ the device where input array `a` is allocated.
1124
+ Default: ``None`` .
1083
1125
out : {None, dpnp.ndarray, usm_ndarray}, optional
1084
1126
Alternative output array in which to place the result. It must have
1085
1127
the same shape as the expected output but the type (of the calculated
1086
1128
values) will be cast if necessary.
1129
+ Default: ``None``.
1087
1130
ddof : {int, float}, optional
1088
- Means Delta Degrees of Freedom. The divisor used in calculations
1089
- is ``N - ddof``, where ``N`` represents the number of non-NaN elements.
1090
- Default: `0.0`.
1131
+ Means Delta Degrees of Freedom. The divisor used in calculations is
1132
+ ``N - ddof``, where ``N`` represents the number of non-NaN elements.
1133
+ Default: `0.0`` .
1091
1134
keepdims : {None, bool}, optional
1092
1135
If ``True``, the reduced axes (dimensions) are included in the result
1093
- as singleton dimensions, so that the returned array remains
1094
- compatible with the input array according to Array Broadcasting
1095
- rules. Otherwise, if ``False``, the reduced axes are not included in
1096
- the returned array. Default: ``False``.
1136
+ as singleton dimensions, so that the returned array remains compatible
1137
+ with the input array according to Array Broadcasting rules. Otherwise,
1138
+ if ``False``, the reduced axes are not included in the returned array.
1139
+ Default: ``False``.
1140
+ mean : {dpnp.ndarray, usm_ndarray}, optional
1141
+ Provide the mean to prevent its recalculation. The mean should have
1142
+ a shape as if it was calculated with ``keepdims=True``.
1143
+ The axis for the calculation of the mean should be the same as used in
1144
+ the call to this `nanvar` function.
1145
+ Default: ``None``.
1097
1146
1098
1147
Returns
1099
1148
-------
1100
1149
out : dpnp.ndarray
1101
- An array containing the variances. If the variance was computed
1102
- over the entire array, a zero-dimensional array is returned.
1103
- If `ddof` is >= the number of non-NaN elements in a slice or the
1104
- slice contains only NaNs, then the result for that slice is NaN.
1150
+ An array containing the variances. If the variance was computed over
1151
+ the entire array, a zero-dimensional array is returned. If `ddof` is >=
1152
+ the number of non-NaN elements in a slice or the slice contains only
1153
+ NaNs, then the result for that slice is NaN.
1105
1154
1106
1155
Limitations
1107
1156
-----------
@@ -1110,6 +1159,16 @@ def nanvar(
1110
1159
1111
1160
Notes
1112
1161
-----
1162
+ The variance is the average of the squared deviations from the mean,
1163
+ that is ``var = mean(abs(x - x.mean())**2)``.
1164
+
1165
+ The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1166
+ If, however, `ddof` is specified, the divisor ``N - ddof`` is used instead.
1167
+ In standard statistical practice, ``ddof=1`` provides an unbiased estimator
1168
+ of the variance of a hypothetical infinite population. ``ddof=0`` provides
1169
+ a maximum likelihood estimate of the variance for normally distributed
1170
+ variables.
1171
+
1113
1172
Note that, for complex numbers, the absolute value is taken before squaring,
1114
1173
so that the result is always real and non-negative.
1115
1174
@@ -1127,11 +1186,18 @@ def nanvar(
1127
1186
>>> import dpnp as np
1128
1187
>>> a = np.array([[1, np.nan], [3, 4]])
1129
1188
>>> np.nanvar(a)
1130
- array(1.5555555555555554 )
1189
+ array(1.55555556 )
1131
1190
>>> np.nanvar(a, axis=0)
1132
- array([1., 0.])
1191
+ array([1., 0.])
1133
1192
>>> np.nanvar(a, axis=1)
1134
- array([0., 0.25]) # may vary
1193
+ array([0. , 0.25]) # may vary
1194
+
1195
+ Using the mean keyword to save computation time:
1196
+
1197
+ >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1198
+ >>> mean = np.nanmean(a, axis=1, keepdims=True)
1199
+ >>> np.nanvar(a, axis=1, mean=mean)
1200
+ array([ 6.22222222, 2.1875 , 16.66666667])
1135
1201
1136
1202
"""
1137
1203
@@ -1157,46 +1223,51 @@ def nanvar(
1157
1223
dtype = dpnp .dtype (dtype )
1158
1224
if not dpnp .issubdtype (dtype , dpnp .inexact ):
1159
1225
raise TypeError ("If input is inexact, then dtype must be inexact." )
1226
+
1160
1227
if out is not None :
1161
1228
dpnp .check_supported_arrays_type (out )
1162
1229
if not dpnp .issubdtype (out .dtype , dpnp .inexact ):
1163
1230
raise TypeError ("If input is inexact, then out must be inexact." )
1164
1231
1165
1232
# Compute mean
1166
- var_dtype = a .real .dtype if dtype is None else dtype
1167
1233
cnt = dpnp .sum (
1168
- ~ mask , axis = axis , dtype = var_dtype , keepdims = True , where = where
1234
+ ~ mask , axis = axis , dtype = dpnp . intp , keepdims = True , where = where
1169
1235
)
1170
- avg = dpnp .sum (arr , axis = axis , dtype = dtype , keepdims = True , where = where )
1171
- avg = dpnp .divide (avg , cnt , out = avg )
1172
1236
1173
- # Compute squared deviation from mean.
1237
+ if mean is not None :
1238
+ avg = mean
1239
+ else :
1240
+ avg = dpnp .sum (arr , axis = axis , dtype = dtype , keepdims = True , where = where )
1241
+ avg = dpnp .divide (avg , cnt , out = avg )
1242
+
1243
+ # Compute squared deviation from mean
1174
1244
if arr .dtype == avg .dtype :
1175
1245
arr = dpnp .subtract (arr , avg , out = arr )
1176
1246
else :
1177
1247
arr = dpnp .subtract (arr , avg )
1178
1248
dpnp .copyto (arr , 0.0 , where = mask )
1249
+
1179
1250
if dpnp .issubdtype (arr .dtype , dpnp .complexfloating ):
1180
1251
sqr = dpnp .multiply (arr , arr .conj (), out = arr ).real
1181
1252
else :
1182
- sqr = dpnp .multiply ( arr , arr , out = arr )
1253
+ sqr = dpnp .square ( arr , out = arr )
1183
1254
1184
1255
# Compute variance
1185
1256
var = dpnp .sum (
1186
1257
sqr ,
1187
1258
axis = axis ,
1188
- dtype = var_dtype ,
1259
+ dtype = dtype ,
1189
1260
out = out ,
1190
1261
keepdims = keepdims ,
1191
1262
where = where ,
1192
1263
)
1193
1264
1194
1265
if var .ndim < cnt .ndim :
1195
1266
cnt = cnt .squeeze (axis )
1196
- cnt -= ddof
1197
- dpnp .divide (var , cnt , out = var )
1267
+ dof = cnt - ddof
1268
+ dpnp .divide (var , dof , out = var )
1198
1269
1199
- isbad = cnt <= 0
1270
+ isbad = dof <= 0
1200
1271
if dpnp .any (isbad ):
1201
1272
# NaN, inf, or negative numbers are all possible bad
1202
1273
# values, so explicitly replace them with NaN.
0 commit comments