Skip to content

Commit f688e94

Browse files
Merge 9fb1e59 into 5649b06
2 parents 5649b06 + 9fb1e59 commit f688e94

File tree

6 files changed

+536
-49
lines changed

6 files changed

+536
-49
lines changed

dpnp/dpnp_iface_histograms.py

Lines changed: 197 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,25 @@
4242

4343
import dpctl.utils as dpu
4444
import numpy
45-
from dpctl.tensor._type_utils import _can_cast
4645

4746
import dpnp
4847

4948
# pylint: disable=no-name-in-module
5049
import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext
50+
from dpnp.dpnp_utils.dpnp_utils_common import (
51+
result_type_for_device,
52+
to_supported_dtypes,
53+
)
5154

5255
# pylint: disable=no-name-in-module
53-
from .dpnp_utils import get_usm_allocations, map_dtype_to_device
56+
from .dpnp_utils import get_usm_allocations
5457

5558
__all__ = [
5659
"bincount",
5760
"digitize",
5861
"histogram",
5962
"histogram_bin_edges",
63+
"histogram2d",
6064
"histogramdd",
6165
]
6266

@@ -65,33 +69,15 @@
6569
_range = range
6670

6771

68-
def _result_type_for_device(dtypes, device):
69-
rt = dpnp.result_type(*dtypes)
70-
return map_dtype_to_device(rt, device)
71-
72-
7372
def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device):
74-
has_fp64 = device.has_aspect_fp64
75-
has_fp16 = device.has_aspect_fp16
76-
77-
a_bin_dtype = _result_type_for_device([a_dtype, bins_dtype], device)
73+
a_bin_dtype = result_type_for_device([a_dtype, bins_dtype], device)
7874

7975
# histogram implementation doesn't support uint64 as histogram type
8076
# we can use int64 instead. Result would be correct even in case of overflow
8177
if ntype == numpy.uint64:
8278
ntype = dpnp.int64
8379

84-
if (a_bin_dtype, ntype) in supported_types:
85-
return a_bin_dtype, ntype
86-
87-
for sample_type, hist_type in supported_types:
88-
if _can_cast(
89-
a_bin_dtype, sample_type, has_fp16, has_fp64
90-
) and _can_cast(ntype, hist_type, has_fp16, has_fp64):
91-
return sample_type, hist_type
92-
93-
# should not happen
94-
return None, None
80+
return to_supported_dtypes([a_bin_dtype, ntype], supported_types, device)
9581

9682

9783
def _ravel_check_a_and_weights(a, weights):
@@ -138,6 +124,9 @@ def _is_finite(a):
138124
return numpy.isfinite(a)
139125

140126
if range is not None:
127+
if len(range) != 2:
128+
raise ValueError("range argument must consist of 2 elements.")
129+
141130
first_edge, last_edge = range
142131
if first_edge > last_edge:
143132
raise ValueError("max must be larger than min in range parameter.")
@@ -520,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
520509
If `bins` is a sequence, it defines a monotonically increasing array
521510
of bin edges, including the rightmost edge, allowing for non-uniform
522511
bin widths.
512+
523513
Default: ``10``.
524514
range : {None, 2-tuple of float}, optional
525515
The lower and upper range of the bins. If not provided, range is simply
@@ -528,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
528518
affects the automatic bin computation as well. While bin width is
529519
computed to be optimal based on the actual data within `range`, the bin
530520
count will fill the entire range including portions containing no data.
521+
531522
Default: ``None``.
532523
density : {None, bool}, optional
533524
If ``False`` or ``None``, the result will contain the number of samples
@@ -536,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
536527
the range is ``1``. Note that the sum of the histogram values will not
537528
be equal to ``1`` unless bins of unity width are chosen; it is not
538529
a probability *mass* function.
530+
539531
Default: ``None``.
540532
weights : {None, dpnp.ndarray, usm_ndarray}, optional
541533
An array of weights, of the same shape as `a`. Each value in `a` only
@@ -545,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
545537
Please note that the ``dtype`` of `weights` will also become the
546538
``dtype`` of the returned accumulator (`hist`), so it must be large
547539
enough to hold accumulated values as well.
540+
548541
Default: ``None``.
549542
550543
Returns
@@ -751,6 +744,166 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
751744
return bin_edges
752745

753746

747+
def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
748+
"""
749+
Compute the bi-dimensional histogram of two data samples.
750+
751+
Parameters
752+
----------
753+
x : {dpnp.ndarray, usm_ndarray} of shape (N,)
754+
An array containing the `x` coordinates of the points to be
755+
histogrammed.
756+
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
757+
An array containing the `y` coordinates of the points to be
758+
histogrammed.
759+
bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
760+
[int, array], [array, int]}, optional
761+
762+
The bins specification:
763+
764+
* If int, the number of bins for the two dimensions (nx=ny=bins).
765+
* If array, the bin edges for the two dimensions
766+
(x_edges=y_edges=bins).
767+
* If [int, int], the number of bins in each dimension
768+
(nx, ny = bins).
769+
* If [array, array], the bin edges in each dimension
770+
(x_edges, y_edges = bins).
771+
* A combination [int, array] or [array, int], where int
772+
is the number of bins and array is the bin edges.
773+
774+
Default: ``10``.
775+
range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
776+
The leftmost and rightmost edges of the bins along each dimension
777+
If ``None`` the ranges are
778+
``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside
779+
of this range will be considered outliers and not tallied in the
780+
histogram.
781+
782+
Default: ``None``.
783+
density : {None, bool}, optional
784+
If ``False`` or ``None``, the default, returns the number of
785+
samples in each bin.
786+
If ``True``, returns the probability *density* function at the bin,
787+
``bin_count / sample_count / bin_volume``.
788+
789+
Default: ``None``.
790+
weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
791+
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
792+
Weights are normalized to ``1`` if `density` is ``True``.
793+
If `density` is ``False``, the values of the returned histogram
794+
are equal to the sum of the weights belonging to the samples
795+
falling into each bin.
796+
If ``None`` all samples are assigned a weight of ``1``.
797+
798+
Default: ``None``.
799+
Returns
800+
-------
801+
H : dpnp.ndarray of shape (nx, ny)
802+
The bi-dimensional histogram of samples `x` and `y`. Values in `x`
803+
are histogrammed along the first dimension and values in `y` are
804+
histogrammed along the second dimension.
805+
xedges : dpnp.ndarray of shape (nx+1,)
806+
The bin edges along the first dimension.
807+
yedges : dpnp.ndarray of shape (ny+1,)
808+
The bin edges along the second dimension.
809+
810+
See Also
811+
--------
812+
:obj:`dpnp.histogram` : 1D histogram
813+
:obj:`dpnp.histogramdd` : Multidimensional histogram
814+
815+
Notes
816+
-----
817+
When `density` is ``True``, then the returned histogram is the sample
818+
density, defined such that the sum over bins of the product
819+
``bin_value * bin_area`` is 1.
820+
821+
Please note that the histogram does not follow the Cartesian convention
822+
where `x` values are on the abscissa and `y` values on the ordinate
823+
axis. Rather, `x` is histogrammed along the first dimension of the
824+
array (vertical), and `y` along the second dimension of the array
825+
(horizontal). This ensures compatibility with `histogramdd`.
826+
827+
Examples
828+
--------
829+
>>> import dpnp as np
830+
>>> x = np.random.randn(20).astype("float32")
831+
>>> y = np.random.randn(20).astype("float32")
832+
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
833+
>>> hist.shape
834+
(4, 3)
835+
>>> hist
836+
array([[1., 2., 0.],
837+
[0., 3., 1.],
838+
[1., 4., 1.],
839+
[1., 3., 3.]], dtype=float32)
840+
>>> edges_x.shape
841+
(5,)
842+
>>> edges_x
843+
array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ],
844+
dtype=float32)
845+
>>> edges_y.shape
846+
(4,)
847+
>>> edges_y
848+
array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32)
849+
850+
Please note, that resulting values of histogram and edges would be different
851+
"""
852+
853+
dpnp.check_supported_arrays_type(x, y)
854+
if weights is not None:
855+
dpnp.check_supported_arrays_type(weights)
856+
857+
if x.ndim != 1 or y.ndim != 1:
858+
raise ValueError(
859+
f"x and y must be 1-dimensional arrays."
860+
f"Got {x.ndim} and {y.ndim} respectively"
861+
)
862+
863+
if len(x) != len(y):
864+
raise ValueError(
865+
f"x and y must have the same length."
866+
f"Got {len(x)} and {len(y)} respectively"
867+
)
868+
869+
usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
870+
device = exec_q.sycl_device
871+
872+
sample_dtype = result_type_for_device([x.dtype, y.dtype], device)
873+
874+
# Unlike histogramdd histogram2d accepts 1d bins and
875+
# apply it to both dimensions
876+
# at the same moment two elements bins should be interpreted as
877+
# number of bins in each dimension and array-like bins with one element
878+
# is not allowed
879+
if isinstance(bins, Iterable) and len(bins) > 2:
880+
bins = [bins] * 2
881+
882+
bins = _histdd_normalize_bins(bins, 2)
883+
bins_dtypes = [sample_dtype]
884+
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]
885+
886+
bins_dtype = result_type_for_device(bins_dtypes, device)
887+
hist_dtype = _histdd_hist_dtype(exec_q, weights)
888+
889+
supported_types = statistics_ext.histogramdd_dtypes()
890+
891+
sample_dtype, _ = _align_dtypes(
892+
sample_dtype, bins_dtype, hist_dtype, supported_types, device
893+
)
894+
895+
sample = dpnp.empty_like(
896+
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
897+
)
898+
sample[:, 0] = x
899+
sample[:, 1] = y
900+
901+
hist, edges = histogramdd(
902+
sample, bins=bins, range=range, density=density, weights=weights
903+
)
904+
return hist, edges[0], edges[1]
905+
906+
754907
def _histdd_validate_bins(bins):
755908
for i, b in enumerate(bins):
756909
if numpy.ndim(b) == 0:
@@ -873,9 +1026,7 @@ def _histdd_hist_dtype(queue, weights):
8731026
# hist_dtype is either float or complex, so it is ok
8741027
# to calculate it as result type between default_float and
8751028
# weights.dtype
876-
hist_dtype = _result_type_for_device(
877-
[hist_dtype, weights.dtype], device
878-
)
1029+
hist_dtype = result_type_for_device([hist_dtype, weights.dtype], device)
8791030

8801031
return hist_dtype
8811032

@@ -886,7 +1037,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list):
8861037
dtypes_ = [bin_edges.dtype for bin_edges in bin_edges_list]
8871038
dtypes_.append(sample.dtype)
8881039

889-
return _result_type_for_device(dtypes_, device)
1040+
return result_type_for_device(dtypes_, device)
8901041

8911042

8921043
def _histdd_supported_dtypes(sample, bin_edges_list, weights):
@@ -918,7 +1069,7 @@ def _histdd_extract_arrays(sample, weights, bins):
9181069
return all_arrays
9191070

9201071

921-
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
1072+
def histogramdd(sample, bins=10, range=None, density=None, weights=None):
9221073
"""
9231074
Compute the multidimensional histogram of some data.
9241075
@@ -936,30 +1087,33 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9361087
* The number of bins for each dimension (nx, ny, ... =bins)
9371088
* The number of bins for all dimensions (nx=ny=...=bins).
9381089
939-
Default: ``10``
1090+
Default: ``10``.
9401091
range : {None, sequence}, optional
9411092
A sequence of length D, each an optional (lower, upper) tuple giving
9421093
the outer bin edges to be used if the edges are not given explicitly in
9431094
`bins`.
944-
An entry of None in the sequence results in the minimum and maximum
1095+
An entry of ``None`` in the sequence results in the minimum and maximum
9451096
values being used for the corresponding dimension.
946-
None is equivalent to passing a tuple of D None values.
947-
948-
Default: ``None``
949-
weights : {dpnp.ndarray, usm_ndarray}, optional
950-
An (N,)-shaped array of values `w_i` weighing each sample
951-
`(x_i, y_i, z_i, ...)`.
952-
Weights are normalized to 1 if density is True. If density is False,
953-
the values of the returned histogram are equal to the sum of the
954-
weights belonging to the samples falling into each bin.
1097+
``None`` is equivalent to passing a tuple of D ``None`` values.
9551098
956-
Default: ``None``
957-
density : bool, optional
958-
If ``False``, the default, returns the number of samples in each bin.
1099+
Default: ``None``.
1100+
density : {None, bool}, optional
1101+
If ``False`` or ``None``, the default, returns the number of
1102+
samples in each bin.
9591103
If ``True``, returns the probability *density* function at the bin,
9601104
``bin_count / sample_count / bin_volume``.
9611105
962-
Default: ``False``
1106+
Default: ``None``.
1107+
weights : {None, dpnp.ndarray, usm_ndarray}, optional
1108+
An (N,)-shaped array of values `w_i` weighing each sample
1109+
`(x_i, y_i, z_i, ...)`.
1110+
Weights are normalized to ``1`` if density is ``True``.
1111+
If density is ``False``, the values of the returned histogram
1112+
are equal to the sum of the weights belonging to the samples
1113+
falling into each bin.
1114+
If ``None`` all samples are assigned a weight of ``1``.
1115+
1116+
Default: ``None``.
9631117
9641118
Returns
9651119
-------
@@ -993,7 +1147,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9931147
elif sample.ndim > 2:
9941148
raise ValueError("sample must have no more than 2 dimensions")
9951149

996-
ndim = sample.shape[1] if sample.size > 0 else 1
1150+
ndim = sample.shape[1]
9971151

9981152
_arrays = _histdd_extract_arrays(sample, weights, bins)
9991153
usm_type, queue = get_usm_allocations(_arrays)

dpnp/dpnp_utils/dpnp_utils_common.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,19 @@ def to_supported_dtypes(dtypes, supported_types, device):
5454
def is_castable(dtype, stype):
5555
return _can_cast(dtype, stype, has_fp16, has_fp64)
5656

57+
if not isinstance(supported_types, Iterable):
58+
supported_types = (supported_types,)
59+
60+
if isinstance(dtypes, Iterable):
61+
sdtypes_elem = supported_types[0]
62+
if not isinstance(sdtypes_elem, Iterable):
63+
raise ValueError(
64+
"Input and supported types must have the same length"
65+
)
66+
67+
typ = type(sdtypes_elem)
68+
dtypes = typ(dtypes)
69+
5770
if dtypes in supported_types:
5871
return dtypes
5972

@@ -78,4 +91,7 @@ def is_castable(dtype, stype):
7891
):
7992
return stypes
8093

81-
return None
94+
if not isinstance(dtypes, Iterable):
95+
return None
96+
97+
return (None,) * len(dtypes)

0 commit comments

Comments
 (0)