|
57 | 57 | "digitize",
|
58 | 58 | "histogram",
|
59 | 59 | "histogram_bin_edges",
|
| 60 | + "histogram2d" |
60 | 61 | "histogramdd",
|
61 | 62 | ]
|
62 | 63 |
|
@@ -751,6 +752,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
|
751 | 752 | return bin_edges
|
752 | 753 |
|
753 | 754 |
|
| 755 | +def histogram2d(x, y, bins=10, range=None, density=None, weights=None): |
| 756 | + """ |
| 757 | + Compute the bi-dimensional histogram of two data samples. |
| 758 | +
|
| 759 | + Parameters |
| 760 | + ---------- |
| 761 | + x : {dpnp.ndarray, usm_ndarray} of shape (N,) |
| 762 | + An array containing the x coordinates of the points to be |
| 763 | + histogrammed. |
| 764 | + y : {dpnp.ndarray, usm_ndarray} of shape (N,) |
| 765 | + An array containing the y coordinates of the points to be |
| 766 | + histogrammed. |
| 767 | + bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional |
| 768 | + The bin specification: |
| 769 | +
|
| 770 | + * If int, the number of bins for the two dimensions (nx=ny=bins). |
| 771 | + * If array, the bin edges for the two dimensions |
| 772 | + (x_edges=y_edges=bins). |
| 773 | + * If [int, int], the number of bins in each dimension |
| 774 | + (nx, ny = bins). |
| 775 | + * If [array, array], the bin edges in each dimension |
| 776 | + (x_edges, y_edges = bins). |
| 777 | + * A combination [int, array] or [array, int], where int |
| 778 | + is the number of bins and array is the bin edges. |
| 779 | +
|
| 780 | + range : {dpnp.ndarray, usm_ndarray} of shape (2,2), optional |
| 781 | + The leftmost and rightmost edges of the bins along each dimension |
| 782 | + (if not specified explicitly in the `bins` parameters): |
| 783 | + ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range |
| 784 | + will be considered outliers and not tallied in the histogram. |
| 785 | + density : bool, optional |
| 786 | + If ``False``, the default, returns the number of samples in each bin. |
| 787 | + If ``True``, returns the probability *density* function at the bin, |
| 788 | + ``bin_count / sample_count / bin_area``. |
| 789 | + weights : {dpnp.ndarray, usm_ndarray} of shape(N,), optional |
| 790 | + An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. |
| 791 | + Weights are normalized to 1 if `density` is True. If `density` is |
| 792 | + False, the values of the returned histogram are equal to the sum of |
| 793 | + the weights belonging to the samples falling into each bin. |
| 794 | +
|
| 795 | + Returns |
| 796 | + ------- |
| 797 | + H : ndarray, shape(nx, ny) |
| 798 | + The bi-dimensional histogram of samples `x` and `y`. Values in `x` |
| 799 | + are histogrammed along the first dimension and values in `y` are |
| 800 | + histogrammed along the second dimension. |
| 801 | + xedges : dpnp.ndarray, shape(nx+1,) |
| 802 | + The bin edges along the first dimension. |
| 803 | + yedges : dpnp.ndarray, shape(ny+1,) |
| 804 | + The bin edges along the second dimension. |
| 805 | +
|
| 806 | + See Also |
| 807 | + -------- |
| 808 | + :obj:`dpnp.histogram` : 1D histogram |
| 809 | + :obj:`dpnp.histogramdd` : Multidimensional histogram |
| 810 | +
|
| 811 | + Notes |
| 812 | + ----- |
| 813 | + When `density` is True, then the returned histogram is the sample |
| 814 | + density, defined such that the sum over bins of the product |
| 815 | + ``bin_value * bin_area`` is 1. |
| 816 | +
|
| 817 | + Please note that the histogram does not follow the Cartesian convention |
| 818 | + where `x` values are on the abscissa and `y` values on the ordinate |
| 819 | + axis. Rather, `x` is histogrammed along the first dimension of the |
| 820 | + array (vertical), and `y` along the second dimension of the array |
| 821 | + (horizontal). This ensures compatibility with `histogramdd`. |
| 822 | +
|
| 823 | + Examples |
| 824 | + -------- |
| 825 | + >>> import numpy as np |
| 826 | + >>> from matplotlib.image import NonUniformImage |
| 827 | + >>> import matplotlib.pyplot as plt |
| 828 | +
|
| 829 | + Construct a 2-D histogram with variable bin width. First define the bin |
| 830 | + edges: |
| 831 | +
|
| 832 | + >>> xedges = [0, 1, 3, 5] |
| 833 | + >>> yedges = [0, 2, 3, 4, 6] |
| 834 | +
|
| 835 | + Next we create a histogram H with random bin content: |
| 836 | +
|
| 837 | + >>> x = np.random.normal(2, 1, 100) |
| 838 | + >>> y = np.random.normal(1, 1, 100) |
| 839 | + >>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges)) |
| 840 | + >>> # Histogram does not follow Cartesian convention (see Notes), |
| 841 | + >>> # therefore transpose H for visualization purposes. |
| 842 | + >>> H = H.T |
| 843 | +
|
| 844 | + :func:`imshow <matplotlib.pyplot.imshow>` can only display square bins: |
| 845 | +
|
| 846 | + >>> fig = plt.figure(figsize=(7, 3)) |
| 847 | + >>> ax = fig.add_subplot(131, title='imshow: square bins') |
| 848 | + >>> plt.imshow(H, interpolation='nearest', origin='lower', |
| 849 | + ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) |
| 850 | + <matplotlib.image.AxesImage object at 0x...> |
| 851 | +
|
| 852 | + :func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges: |
| 853 | +
|
| 854 | + >>> ax = fig.add_subplot(132, title='pcolormesh: actual edges', |
| 855 | + ... aspect='equal') |
| 856 | + >>> X, Y = np.meshgrid(xedges, yedges) |
| 857 | + >>> ax.pcolormesh(X, Y, H) |
| 858 | + <matplotlib.collections.QuadMesh object at 0x...> |
| 859 | +
|
| 860 | + :class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to |
| 861 | + display actual bin edges with interpolation: |
| 862 | +
|
| 863 | + >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', |
| 864 | + ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) |
| 865 | + >>> im = NonUniformImage(ax, interpolation='bilinear') |
| 866 | + >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 |
| 867 | + >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 |
| 868 | + >>> im.set_data(xcenters, ycenters, H) |
| 869 | + >>> ax.add_image(im) |
| 870 | + >>> plt.show() |
| 871 | +
|
| 872 | + It is also possible to construct a 2-D histogram without specifying bin |
| 873 | + edges: |
| 874 | +
|
| 875 | + >>> # Generate non-symmetric test data |
| 876 | + >>> n = 10000 |
| 877 | + >>> x = np.linspace(1, 100, n) |
| 878 | + >>> y = 2*np.log(x) + np.random.rand(n) - 0.5 |
| 879 | + >>> # Compute 2d histogram. Note the order of x/y and xedges/yedges |
| 880 | + >>> H, yedges, xedges = np.histogram2d(y, x, bins=20) |
| 881 | +
|
| 882 | + Now we can plot the histogram using |
| 883 | + :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`, and a |
| 884 | + :func:`hexbin <matplotlib.pyplot.hexbin>` for comparison. |
| 885 | +
|
| 886 | + >>> # Plot histogram using pcolormesh |
| 887 | + >>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True) |
| 888 | + >>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow') |
| 889 | + >>> ax1.plot(x, 2*np.log(x), 'k-') |
| 890 | + >>> ax1.set_xlim(x.min(), x.max()) |
| 891 | + >>> ax1.set_ylim(y.min(), y.max()) |
| 892 | + >>> ax1.set_xlabel('x') |
| 893 | + >>> ax1.set_ylabel('y') |
| 894 | + >>> ax1.set_title('histogram2d') |
| 895 | + >>> ax1.grid() |
| 896 | +
|
| 897 | + >>> # Create hexbin plot for comparison |
| 898 | + >>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow') |
| 899 | + >>> ax2.plot(x, 2*np.log(x), 'k-') |
| 900 | + >>> ax2.set_title('hexbin') |
| 901 | + >>> ax2.set_xlim(x.min(), x.max()) |
| 902 | + >>> ax2.set_xlabel('x') |
| 903 | + >>> ax2.grid() |
| 904 | +
|
| 905 | + >>> plt.show() |
| 906 | + """ |
| 907 | + |
| 908 | + if len(x) != len(y): |
| 909 | + raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively') |
| 910 | + |
| 911 | + |
| 912 | + hist, edges = histogramdd([x, y], bins, range, density, weights) |
| 913 | + return hist, edges[0], edges[1] |
| 914 | + |
| 915 | + |
754 | 916 | def _histdd_validate_bins(bins):
|
755 | 917 | for i, b in enumerate(bins):
|
756 | 918 | if numpy.ndim(b) == 0:
|
|
0 commit comments