Skip to content

Commit 3b3a233

Browse files
initial
1 parent 498e705 commit 3b3a233

File tree

1 file changed

+162
-0
lines changed

1 file changed

+162
-0
lines changed

dpnp/dpnp_iface_histograms.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
"digitize",
5858
"histogram",
5959
"histogram_bin_edges",
60+
"histogram2d"
6061
"histogramdd",
6162
]
6263

@@ -751,6 +752,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
751752
return bin_edges
752753

753754

755+
def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
756+
"""
757+
Compute the bi-dimensional histogram of two data samples.
758+
759+
Parameters
760+
----------
761+
x : {dpnp.ndarray, usm_ndarray} of shape (N,)
762+
An array containing the x coordinates of the points to be
763+
histogrammed.
764+
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
765+
An array containing the y coordinates of the points to be
766+
histogrammed.
767+
bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional
768+
The bin specification:
769+
770+
* If int, the number of bins for the two dimensions (nx=ny=bins).
771+
* If array, the bin edges for the two dimensions
772+
(x_edges=y_edges=bins).
773+
* If [int, int], the number of bins in each dimension
774+
(nx, ny = bins).
775+
* If [array, array], the bin edges in each dimension
776+
(x_edges, y_edges = bins).
777+
* A combination [int, array] or [array, int], where int
778+
is the number of bins and array is the bin edges.
779+
780+
range : {dpnp.ndarray, usm_ndarray} of shape (2,2), optional
781+
The leftmost and rightmost edges of the bins along each dimension
782+
(if not specified explicitly in the `bins` parameters):
783+
``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
784+
will be considered outliers and not tallied in the histogram.
785+
density : bool, optional
786+
If ``False``, the default, returns the number of samples in each bin.
787+
If ``True``, returns the probability *density* function at the bin,
788+
``bin_count / sample_count / bin_area``.
789+
weights : {dpnp.ndarray, usm_ndarray} of shape(N,), optional
790+
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
791+
Weights are normalized to 1 if `density` is True. If `density` is
792+
False, the values of the returned histogram are equal to the sum of
793+
the weights belonging to the samples falling into each bin.
794+
795+
Returns
796+
-------
797+
H : ndarray, shape(nx, ny)
798+
The bi-dimensional histogram of samples `x` and `y`. Values in `x`
799+
are histogrammed along the first dimension and values in `y` are
800+
histogrammed along the second dimension.
801+
xedges : dpnp.ndarray, shape(nx+1,)
802+
The bin edges along the first dimension.
803+
yedges : dpnp.ndarray, shape(ny+1,)
804+
The bin edges along the second dimension.
805+
806+
See Also
807+
--------
808+
:obj:`dpnp.histogram` : 1D histogram
809+
:obj:`dpnp.histogramdd` : Multidimensional histogram
810+
811+
Notes
812+
-----
813+
When `density` is True, then the returned histogram is the sample
814+
density, defined such that the sum over bins of the product
815+
``bin_value * bin_area`` is 1.
816+
817+
Please note that the histogram does not follow the Cartesian convention
818+
where `x` values are on the abscissa and `y` values on the ordinate
819+
axis. Rather, `x` is histogrammed along the first dimension of the
820+
array (vertical), and `y` along the second dimension of the array
821+
(horizontal). This ensures compatibility with `histogramdd`.
822+
823+
Examples
824+
--------
825+
>>> import numpy as np
826+
>>> from matplotlib.image import NonUniformImage
827+
>>> import matplotlib.pyplot as plt
828+
829+
Construct a 2-D histogram with variable bin width. First define the bin
830+
edges:
831+
832+
>>> xedges = [0, 1, 3, 5]
833+
>>> yedges = [0, 2, 3, 4, 6]
834+
835+
Next we create a histogram H with random bin content:
836+
837+
>>> x = np.random.normal(2, 1, 100)
838+
>>> y = np.random.normal(1, 1, 100)
839+
>>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
840+
>>> # Histogram does not follow Cartesian convention (see Notes),
841+
>>> # therefore transpose H for visualization purposes.
842+
>>> H = H.T
843+
844+
:func:`imshow <matplotlib.pyplot.imshow>` can only display square bins:
845+
846+
>>> fig = plt.figure(figsize=(7, 3))
847+
>>> ax = fig.add_subplot(131, title='imshow: square bins')
848+
>>> plt.imshow(H, interpolation='nearest', origin='lower',
849+
... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
850+
<matplotlib.image.AxesImage object at 0x...>
851+
852+
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges:
853+
854+
>>> ax = fig.add_subplot(132, title='pcolormesh: actual edges',
855+
... aspect='equal')
856+
>>> X, Y = np.meshgrid(xedges, yedges)
857+
>>> ax.pcolormesh(X, Y, H)
858+
<matplotlib.collections.QuadMesh object at 0x...>
859+
860+
:class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to
861+
display actual bin edges with interpolation:
862+
863+
>>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated',
864+
... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
865+
>>> im = NonUniformImage(ax, interpolation='bilinear')
866+
>>> xcenters = (xedges[:-1] + xedges[1:]) / 2
867+
>>> ycenters = (yedges[:-1] + yedges[1:]) / 2
868+
>>> im.set_data(xcenters, ycenters, H)
869+
>>> ax.add_image(im)
870+
>>> plt.show()
871+
872+
It is also possible to construct a 2-D histogram without specifying bin
873+
edges:
874+
875+
>>> # Generate non-symmetric test data
876+
>>> n = 10000
877+
>>> x = np.linspace(1, 100, n)
878+
>>> y = 2*np.log(x) + np.random.rand(n) - 0.5
879+
>>> # Compute 2d histogram. Note the order of x/y and xedges/yedges
880+
>>> H, yedges, xedges = np.histogram2d(y, x, bins=20)
881+
882+
Now we can plot the histogram using
883+
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`, and a
884+
:func:`hexbin <matplotlib.pyplot.hexbin>` for comparison.
885+
886+
>>> # Plot histogram using pcolormesh
887+
>>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True)
888+
>>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow')
889+
>>> ax1.plot(x, 2*np.log(x), 'k-')
890+
>>> ax1.set_xlim(x.min(), x.max())
891+
>>> ax1.set_ylim(y.min(), y.max())
892+
>>> ax1.set_xlabel('x')
893+
>>> ax1.set_ylabel('y')
894+
>>> ax1.set_title('histogram2d')
895+
>>> ax1.grid()
896+
897+
>>> # Create hexbin plot for comparison
898+
>>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow')
899+
>>> ax2.plot(x, 2*np.log(x), 'k-')
900+
>>> ax2.set_title('hexbin')
901+
>>> ax2.set_xlim(x.min(), x.max())
902+
>>> ax2.set_xlabel('x')
903+
>>> ax2.grid()
904+
905+
>>> plt.show()
906+
"""
907+
908+
if len(x) != len(y):
909+
raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively')
910+
911+
912+
hist, edges = histogramdd([x, y], bins, range, density, weights)
913+
return hist, edges[0], edges[1]
914+
915+
754916
def _histdd_validate_bins(bins):
755917
for i, b in enumerate(bins):
756918
if numpy.ndim(b) == 0:

0 commit comments

Comments
 (0)