Skip to content

Commit a5ec191

Browse files
authored
Merge eb07397 into d415fe2
2 parents d415fe2 + eb07397 commit a5ec191

File tree

5 files changed

+688
-582
lines changed

5 files changed

+688
-582
lines changed

dpnp/dpnp_array.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def std(
17311731
keepdims=False,
17321732
*,
17331733
where=True,
1734+
mean=None,
17341735
):
17351736
"""
17361737
Returns the standard deviation of the array elements, along given axis.
@@ -1739,7 +1740,9 @@ def std(
17391740
17401741
"""
17411742

1742-
return dpnp.std(self, axis, dtype, out, ddof, keepdims, where=where)
1743+
return dpnp.std(
1744+
self, axis, dtype, out, ddof, keepdims, where=where, mean=mean
1745+
)
17431746

17441747
@property
17451748
def strides(self):
@@ -1938,6 +1941,7 @@ def var(
19381941
keepdims=False,
19391942
*,
19401943
where=True,
1944+
mean=None,
19411945
):
19421946
"""
19431947
Returns the variance of the array elements, along given axis.
@@ -1946,7 +1950,9 @@ def var(
19461950
19471951
"""
19481952

1949-
return dpnp.var(self, axis, dtype, out, ddof, keepdims, where=where)
1953+
return dpnp.var(
1954+
self, axis, dtype, out, ddof, keepdims, where=where, mean=mean
1955+
)
19501956

19511957

19521958
# 'view'

dpnp/dpnp_iface_nanfunctions.py

Lines changed: 127 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,15 @@ def nansum(
955955

956956

957957
def nanstd(
958-
a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True
958+
a,
959+
axis=None,
960+
dtype=None,
961+
out=None,
962+
ddof=0,
963+
keepdims=False,
964+
*,
965+
where=True,
966+
mean=None,
959967
):
960968
"""
961969
Compute the standard deviation along the specified axis,
@@ -969,40 +977,46 @@ def nanstd(
969977
Input array.
970978
axis : {None, int, tuple of ints}, optional
971979
Axis or axes along which the standard deviations must be computed.
972-
If a tuple of unique integers is given, the standard deviations
973-
are computed over multiple axes. If ``None``, the standard deviation
974-
is computed over the entire array.
980+
If a tuple of unique integers is given, the standard deviations are
981+
computed over multiple axes. If ``None``, the standard deviation is
982+
computed over the entire array.
975983
Default: ``None``.
976984
dtype : {None, dtype}, optional
977-
Type to use in computing the standard deviation. By default,
978-
if `a` has a floating-point data type, the returned array
979-
will have the same data type as `a`.
980-
If `a` has a boolean or integral data type, the returned array
981-
will have the default floating point data type for the device
985+
Type to use in computing the standard deviation. By default, if `a` has
986+
a floating-point data type, the returned array will have the same data
987+
type as `a`. If `a` has a boolean or integral data type, the returned
988+
array will have the default floating point data type for the device
982989
where input array `a` is allocated.
990+
Default: ``None``.
983991
out : {None, dpnp.ndarray, usm_ndarray}, optional
984992
Alternative output array in which to place the result. It must have
985993
the same shape as the expected output but the type (of the calculated
986994
values) will be cast if necessary.
995+
Default: ``None``.
987996
ddof : {int, float}, optional
988-
Means Delta Degrees of Freedom. The divisor used in calculations
989-
is ``N - ddof``, where ``N`` the number of non-NaN elements.
990-
Default: `0.0`.
997+
Means Delta Degrees of Freedom. The divisor used in calculations is
998+
``N - ddof``, where ``N`` the number of non-NaN elements.
999+
Default: ``0.0``.
9911000
keepdims : {None, bool}, optional
9921001
If ``True``, the reduced axes (dimensions) are included in the result
993-
as singleton dimensions, so that the returned array remains
994-
compatible with the input array according to Array Broadcasting
995-
rules. Otherwise, if ``False``, the reduced axes are not included in
996-
the returned array. Default: ``False``.
1002+
as singleton dimensions, so that the returned array remains compatible
1003+
with the input array according to Array Broadcasting rules. Otherwise,
1004+
if ``False``, the reduced axes are not included in the returned array.
1005+
Default: ``False``.
1006+
mean : {dpnp.ndarray, usm_ndarray}, optional
1007+
Provide the mean to prevent its recalculation. The mean should have
1008+
a shape as if it was calculated with ``keepdims=True``.
1009+
The axis for the calculation of the mean should be the same as used in
1010+
the call to this `nanstd` function.
1011+
Default: ``None``.
9971012
9981013
Returns
9991014
-------
10001015
out : dpnp.ndarray
1001-
An array containing the standard deviations. If the standard
1002-
deviation was computed over the entire array, a zero-dimensional
1003-
array is returned. If `ddof` is >= the number of non-NaN elements
1004-
in a slice or the slice contains only NaNs, then the result for
1005-
that slice is NaN.
1016+
An array containing the standard deviations. If the standard deviation
1017+
was computed over the entire array, a zero-dimensional array is
1018+
returned. If `ddof` is >= the number of non-NaN elements in a slice or
1019+
the slice contains only NaNs, then the result for that slice is NaN.
10061020
10071021
Limitations
10081022
-----------
@@ -1011,6 +1025,19 @@ def nanstd(
10111025
10121026
Notes
10131027
-----
1028+
The standard deviation is the square root of the average of the squared
1029+
deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1030+
1031+
The average squared deviation is normally calculated as ``x.sum() / N``,
1032+
where ``N = len(x)``. If, however, `ddof` is specified, the divisor
1033+
``N - ddof`` is used instead. In standard statistical practice, ``ddof=1``
1034+
provides an unbiased estimator of the variance of the infinite population.
1035+
``ddof=0`` provides a maximum likelihood estimate of the variance for
1036+
normally distributed variables.
1037+
The standard deviation computed in this function is the square root of
1038+
the estimated variance, so even with ``ddof=1``, it will not be an unbiased
1039+
estimate of the standard deviation per se.
1040+
10141041
Note that, for complex numbers, the absolute value is taken before
10151042
squaring, so that the result is always real and non-negative.
10161043
@@ -1029,11 +1056,18 @@ def nanstd(
10291056
>>> import dpnp as np
10301057
>>> a = np.array([[1, np.nan], [3, 4]])
10311058
>>> np.nanstd(a)
1032-
array(1.247219128924647)
1059+
array(1.24721913)
10331060
>>> np.nanstd(a, axis=0)
1034-
array([1., 0.])
1061+
array([1., 0.])
10351062
>>> np.nanstd(a, axis=1)
1036-
array([0., 0.5]) # may vary
1063+
array([0. , 0.5]) # may vary
1064+
1065+
Using the mean keyword to save computation time:
1066+
1067+
>>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1068+
>>> mean = np.nanmean(a, axis=1, keepdims=True)
1069+
>>> np.nanstd(a, axis=1, mean=mean)
1070+
array([2.49443826, 1.47901995, 4.0824829 ])
10371071
10381072
"""
10391073

@@ -1051,13 +1085,21 @@ def nanstd(
10511085
ddof=ddof,
10521086
keepdims=keepdims,
10531087
where=where,
1088+
mean=mean,
10541089
)
1055-
dpnp.sqrt(res, out=res)
1056-
return res
1090+
return dpnp.sqrt(res, out=res)
10571091

10581092

10591093
def nanvar(
1060-
a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True
1094+
a,
1095+
axis=None,
1096+
dtype=None,
1097+
out=None,
1098+
ddof=0,
1099+
keepdims=False,
1100+
*,
1101+
where=True,
1102+
mean=None,
10611103
):
10621104
"""
10631105
Compute the variance along the specified axis, while ignoring NaNs.
@@ -1069,39 +1111,46 @@ def nanvar(
10691111
a : {dpnp.ndarray, usm_ndarray}
10701112
Input array.
10711113
axis : {None, int, tuple of ints}, optional
1072-
axis or axes along which the variances must be computed. If a tuple
1114+
Axis or axes along which the variances must be computed. If a tuple
10731115
of unique integers is given, the variances are computed over multiple
10741116
axes. If ``None``, the variance is computed over the entire array.
10751117
Default: ``None``.
10761118
dtype : {None, dtype}, optional
10771119
Type to use in computing the variance. By default, if `a` has a
10781120
floating-point data type, the returned array will have
1079-
the same data type as `a`.
1080-
If `a` has a boolean or integral data type, the returned array
1081-
will have the default floating point data type for the device
1082-
where input array `a` is allocated.
1121+
the same data type as `a`. If `a` has a boolean or integral data type,
1122+
the returned array will have the default floating point data type for
1123+
the device where input array `a` is allocated.
1124+
Default: ``None``.
10831125
out : {None, dpnp.ndarray, usm_ndarray}, optional
10841126
Alternative output array in which to place the result. It must have
10851127
the same shape as the expected output but the type (of the calculated
10861128
values) will be cast if necessary.
1129+
Default: ``None``.
10871130
ddof : {int, float}, optional
1088-
Means Delta Degrees of Freedom. The divisor used in calculations
1089-
is ``N - ddof``, where ``N`` represents the number of non-NaN elements.
1090-
Default: `0.0`.
1131+
Means Delta Degrees of Freedom. The divisor used in calculations is
1132+
``N - ddof``, where ``N`` represents the number of non-NaN elements.
1133+
Default: `0.0``.
10911134
keepdims : {None, bool}, optional
10921135
If ``True``, the reduced axes (dimensions) are included in the result
1093-
as singleton dimensions, so that the returned array remains
1094-
compatible with the input array according to Array Broadcasting
1095-
rules. Otherwise, if ``False``, the reduced axes are not included in
1096-
the returned array. Default: ``False``.
1136+
as singleton dimensions, so that the returned array remains compatible
1137+
with the input array according to Array Broadcasting rules. Otherwise,
1138+
if ``False``, the reduced axes are not included in the returned array.
1139+
Default: ``False``.
1140+
mean : {dpnp.ndarray, usm_ndarray}, optional
1141+
Provide the mean to prevent its recalculation. The mean should have
1142+
a shape as if it was calculated with ``keepdims=True``.
1143+
The axis for the calculation of the mean should be the same as used in
1144+
the call to this `nanvar` function.
1145+
Default: ``None``.
10971146
10981147
Returns
10991148
-------
11001149
out : dpnp.ndarray
1101-
An array containing the variances. If the variance was computed
1102-
over the entire array, a zero-dimensional array is returned.
1103-
If `ddof` is >= the number of non-NaN elements in a slice or the
1104-
slice contains only NaNs, then the result for that slice is NaN.
1150+
An array containing the variances. If the variance was computed over
1151+
the entire array, a zero-dimensional array is returned. If `ddof` is >=
1152+
the number of non-NaN elements in a slice or the slice contains only
1153+
NaNs, then the result for that slice is NaN.
11051154
11061155
Limitations
11071156
-----------
@@ -1110,6 +1159,16 @@ def nanvar(
11101159
11111160
Notes
11121161
-----
1162+
The variance is the average of the squared deviations from the mean,
1163+
that is ``var = mean(abs(x - x.mean())**2)``.
1164+
1165+
The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1166+
If, however, `ddof` is specified, the divisor ``N - ddof`` is used instead.
1167+
In standard statistical practice, ``ddof=1`` provides an unbiased estimator
1168+
of the variance of a hypothetical infinite population. ``ddof=0`` provides
1169+
a maximum likelihood estimate of the variance for normally distributed
1170+
variables.
1171+
11131172
Note that, for complex numbers, the absolute value is taken before squaring,
11141173
so that the result is always real and non-negative.
11151174
@@ -1127,11 +1186,18 @@ def nanvar(
11271186
>>> import dpnp as np
11281187
>>> a = np.array([[1, np.nan], [3, 4]])
11291188
>>> np.nanvar(a)
1130-
array(1.5555555555555554)
1189+
array(1.55555556)
11311190
>>> np.nanvar(a, axis=0)
1132-
array([1., 0.])
1191+
array([1., 0.])
11331192
>>> np.nanvar(a, axis=1)
1134-
array([0., 0.25]) # may vary
1193+
array([0. , 0.25]) # may vary
1194+
1195+
Using the mean keyword to save computation time:
1196+
1197+
>>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1198+
>>> mean = np.nanmean(a, axis=1, keepdims=True)
1199+
>>> np.nanvar(a, axis=1, mean=mean)
1200+
array([ 6.22222222, 2.1875 , 16.66666667])
11351201
11361202
"""
11371203

@@ -1157,46 +1223,51 @@ def nanvar(
11571223
dtype = dpnp.dtype(dtype)
11581224
if not dpnp.issubdtype(dtype, dpnp.inexact):
11591225
raise TypeError("If input is inexact, then dtype must be inexact.")
1226+
11601227
if out is not None:
11611228
dpnp.check_supported_arrays_type(out)
11621229
if not dpnp.issubdtype(out.dtype, dpnp.inexact):
11631230
raise TypeError("If input is inexact, then out must be inexact.")
11641231

11651232
# Compute mean
1166-
var_dtype = a.real.dtype if dtype is None else dtype
11671233
cnt = dpnp.sum(
1168-
~mask, axis=axis, dtype=var_dtype, keepdims=True, where=where
1234+
~mask, axis=axis, dtype=dpnp.intp, keepdims=True, where=where
11691235
)
1170-
avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where)
1171-
avg = dpnp.divide(avg, cnt, out=avg)
11721236

1173-
# Compute squared deviation from mean.
1237+
if mean is not None:
1238+
avg = mean
1239+
else:
1240+
avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where)
1241+
avg = dpnp.divide(avg, cnt, out=avg)
1242+
1243+
# Compute squared deviation from mean
11741244
if arr.dtype == avg.dtype:
11751245
arr = dpnp.subtract(arr, avg, out=arr)
11761246
else:
11771247
arr = dpnp.subtract(arr, avg)
11781248
dpnp.copyto(arr, 0.0, where=mask)
1249+
11791250
if dpnp.issubdtype(arr.dtype, dpnp.complexfloating):
11801251
sqr = dpnp.multiply(arr, arr.conj(), out=arr).real
11811252
else:
1182-
sqr = dpnp.multiply(arr, arr, out=arr)
1253+
sqr = dpnp.square(arr, out=arr)
11831254

11841255
# Compute variance
11851256
var = dpnp.sum(
11861257
sqr,
11871258
axis=axis,
1188-
dtype=var_dtype,
1259+
dtype=dtype,
11891260
out=out,
11901261
keepdims=keepdims,
11911262
where=where,
11921263
)
11931264

11941265
if var.ndim < cnt.ndim:
11951266
cnt = cnt.squeeze(axis)
1196-
cnt -= ddof
1197-
dpnp.divide(var, cnt, out=var)
1267+
dof = cnt - ddof
1268+
dpnp.divide(var, dof, out=var)
11981269

1199-
isbad = cnt <= 0
1270+
isbad = dof <= 0
12001271
if dpnp.any(isbad):
12011272
# NaN, inf, or negative numbers are all possible bad
12021273
# values, so explicitly replace them with NaN.

0 commit comments

Comments
 (0)