Skip to content

Commit 541e7d9

Browse files
Implementation of histogram2d
1 parent 3b3a233 commit 541e7d9

File tree

5 files changed

+352
-92
lines changed

5 files changed

+352
-92
lines changed

dpnp/dpnp_iface_histograms.py

Lines changed: 67 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"digitize",
5858
"histogram",
5959
"histogram_bin_edges",
60-
"histogram2d"
60+
"histogram2d",
6161
"histogramdd",
6262
]
6363

@@ -753,6 +753,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
753753

754754

755755
def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
756+
# pylint: disable=line-too-long
756757
"""
757758
Compute the bi-dimensional histogram of two data samples.
758759
@@ -764,7 +765,7 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
764765
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
765766
An array containing the y coordinates of the points to be
766767
histogrammed.
767-
bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional
768+
bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, optional
768769
The bin specification:
769770
770771
* If int, the number of bins for the two dimensions (nx=ny=bins).
@@ -822,94 +823,73 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
822823
823824
Examples
824825
--------
825-
>>> import numpy as np
826-
>>> from matplotlib.image import NonUniformImage
827-
>>> import matplotlib.pyplot as plt
828-
829-
Construct a 2-D histogram with variable bin width. First define the bin
830-
edges:
831-
832-
>>> xedges = [0, 1, 3, 5]
833-
>>> yedges = [0, 2, 3, 4, 6]
834-
835-
Next we create a histogram H with random bin content:
836-
837-
>>> x = np.random.normal(2, 1, 100)
838-
>>> y = np.random.normal(1, 1, 100)
839-
>>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
840-
>>> # Histogram does not follow Cartesian convention (see Notes),
841-
>>> # therefore transpose H for visualization purposes.
842-
>>> H = H.T
843-
844-
:func:`imshow <matplotlib.pyplot.imshow>` can only display square bins:
845-
846-
>>> fig = plt.figure(figsize=(7, 3))
847-
>>> ax = fig.add_subplot(131, title='imshow: square bins')
848-
>>> plt.imshow(H, interpolation='nearest', origin='lower',
849-
... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
850-
<matplotlib.image.AxesImage object at 0x...>
851-
852-
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges:
853-
854-
>>> ax = fig.add_subplot(132, title='pcolormesh: actual edges',
855-
... aspect='equal')
856-
>>> X, Y = np.meshgrid(xedges, yedges)
857-
>>> ax.pcolormesh(X, Y, H)
858-
<matplotlib.collections.QuadMesh object at 0x...>
859-
860-
:class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to
861-
display actual bin edges with interpolation:
862-
863-
>>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated',
864-
... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
865-
>>> im = NonUniformImage(ax, interpolation='bilinear')
866-
>>> xcenters = (xedges[:-1] + xedges[1:]) / 2
867-
>>> ycenters = (yedges[:-1] + yedges[1:]) / 2
868-
>>> im.set_data(xcenters, ycenters, H)
869-
>>> ax.add_image(im)
870-
>>> plt.show()
871-
872-
It is also possible to construct a 2-D histogram without specifying bin
873-
edges:
874-
875-
>>> # Generate non-symmetric test data
876-
>>> n = 10000
877-
>>> x = np.linspace(1, 100, n)
878-
>>> y = 2*np.log(x) + np.random.rand(n) - 0.5
879-
>>> # Compute 2d histogram. Note the order of x/y and xedges/yedges
880-
>>> H, yedges, xedges = np.histogram2d(y, x, bins=20)
881-
882-
Now we can plot the histogram using
883-
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`, and a
884-
:func:`hexbin <matplotlib.pyplot.hexbin>` for comparison.
885-
886-
>>> # Plot histogram using pcolormesh
887-
>>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True)
888-
>>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow')
889-
>>> ax1.plot(x, 2*np.log(x), 'k-')
890-
>>> ax1.set_xlim(x.min(), x.max())
891-
>>> ax1.set_ylim(y.min(), y.max())
892-
>>> ax1.set_xlabel('x')
893-
>>> ax1.set_ylabel('y')
894-
>>> ax1.set_title('histogram2d')
895-
>>> ax1.grid()
896-
897-
>>> # Create hexbin plot for comparison
898-
>>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow')
899-
>>> ax2.plot(x, 2*np.log(x), 'k-')
900-
>>> ax2.set_title('hexbin')
901-
>>> ax2.set_xlim(x.min(), x.max())
902-
>>> ax2.set_xlabel('x')
903-
>>> ax2.grid()
904-
905-
>>> plt.show()
826+
>>> import dpnp as np
827+
>>> x = np.random.randn(20)
828+
>>> y = np.random.randn(20)
829+
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
830+
>>> hist
831+
[[1. 0. 0.]
832+
[0. 0. 0.]
833+
[5. 6. 4.]
834+
[1. 2. 1.]]
835+
>>> edges_x
836+
[-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ]
837+
>>> edges_y
838+
[-1.1889046 -0.07263839 1.0436279 2.159894 ]
906839
"""
840+
# pylint: enable=line-too-long
841+
842+
dpnp.check_supported_arrays_type(x, y)
843+
if weights is not None:
844+
dpnp.check_supported_arrays_type(weights)
845+
846+
if x.ndim != 1 or y.ndim != 1:
847+
raise ValueError(
848+
f"x and y must be 1-dimensional arrays."
849+
f"Got {x.ndim} and {y.ndim} respectively"
850+
)
907851

908852
if len(x) != len(y):
909-
raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively')
853+
raise ValueError(
854+
f"x and y must have the same length."
855+
f"Got {len(x)} and {len(y)} respectively"
856+
)
857+
858+
usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
859+
device = exec_q.sycl_device
910860

861+
sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)
911862

912-
hist, edges = histogramdd([x, y], bins, range, density, weights)
863+
# Unlike histogramdd histogram2d accepts 1d bins and
864+
# apply it to both dimensions
865+
# at the same moment two elements bins should be interpreted as
866+
# number of bins in each dimension and array-like bins with one element
867+
# is not allowed
868+
if isinstance(bins, Iterable) and len(bins) > 2:
869+
bins = [bins] * 2
870+
871+
bins = _histdd_normalize_bins(bins, 2)
872+
bins_dtypes = [sample_dtype]
873+
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]
874+
875+
bins_dtype = _result_type_for_device(bins_dtypes, device)
876+
hist_dtype = _histdd_hist_dtype(exec_q, weights)
877+
878+
supported_types = statistics_ext.histogramdd_dtypes()
879+
880+
sample_dtype, _ = _align_dtypes(
881+
sample_dtype, bins_dtype, hist_dtype, supported_types, device
882+
)
883+
884+
sample = dpnp.empty_like(
885+
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
886+
)
887+
sample[:, 0] = x
888+
sample[:, 1] = y
889+
890+
hist, edges = histogramdd(
891+
sample, bins=bins, range=range, density=density, weights=weights
892+
)
913893
return hist, edges[0], edges[1]
914894

915895

@@ -1080,7 +1060,7 @@ def _histdd_extract_arrays(sample, weights, bins):
10801060
return all_arrays
10811061

10821062

1083-
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
1063+
def histogramdd(sample, bins=10, range=None, density=False, weights=None):
10841064
"""
10851065
Compute the multidimensional histogram of some data.
10861066
@@ -1155,7 +1135,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
11551135
elif sample.ndim > 2:
11561136
raise ValueError("sample must have no more than 2 dimensions")
11571137

1158-
ndim = sample.shape[1] if sample.size > 0 else 1
1138+
ndim = sample.shape[1]
11591139

11601140
_arrays = _histdd_extract_arrays(sample, weights, bins)
11611141
usm_type, queue = get_usm_allocations(_arrays)

0 commit comments

Comments
 (0)