Skip to content

Commit ba5fb31

Browse files
Implementation of corrcoef (#2139)
* Implementation of corrcoef * Apply review comments * Fix docs --------- Co-authored-by: Anton <[email protected]>
1 parent 44c1ed3 commit ba5fb31

File tree

7 files changed

+193
-14
lines changed

7 files changed

+193
-14
lines changed

dpnp/dpnp_iface_statistics.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
"amin",
6666
"average",
6767
"bincount",
68+
"corrcoef",
6869
"correlate",
6970
"cov",
7071
"max",
@@ -360,6 +361,127 @@ def bincount(x1, weights=None, minlength=0):
360361
return call_origin(numpy.bincount, x1, weights=weights, minlength=minlength)
361362

362363

364+
def corrcoef(x, y=None, rowvar=True, *, dtype=None):
365+
"""
366+
Return Pearson product-moment correlation coefficients.
367+
368+
For full documentation refer to :obj:`numpy.corrcoef`.
369+
370+
Parameters
371+
----------
372+
x : {dpnp.ndarray, usm_ndarray}
373+
A 1-D or 2-D array containing multiple variables and observations.
374+
Each row of `x` represents a variable, and each column a single
375+
observation of all those variables. Also see `rowvar` below.
376+
y : {None, dpnp.ndarray, usm_ndarray}, optional
377+
An additional set of variables and observations. `y` has the same
378+
shape as `x`.
379+
Default: ``None``.
380+
rowvar : {bool}, optional
381+
If `rowvar` is ``True``, then each row represents a variable,
382+
with observations in the columns. Otherwise, the relationship
383+
is transposed: each column represents a variable, while the rows
384+
contain observations.
385+
Default: ``True``.
386+
dtype : {None, dtype}, optional
387+
Data-type of the result.
388+
Default: ``None``.
389+
390+
Returns
391+
-------
392+
R : {dpnp.ndarray}
393+
The correlation coefficient matrix of the variables.
394+
395+
See Also
396+
--------
397+
:obj:`dpnp.cov` : Covariance matrix.
398+
399+
Examples
400+
--------
401+
In this example we generate two random arrays, ``xarr`` and ``yarr``, and
402+
compute the row-wise and column-wise Pearson correlation coefficients,
403+
``R``. Since `rowvar` is true by default, we first find the row-wise
404+
Pearson correlation coefficients between the variables of ``xarr``.
405+
406+
>>> import dpnp as np
407+
>>> np.random.seed(123)
408+
>>> xarr = np.random.rand(3, 3).astype(np.float32)
409+
>>> xarr
410+
array([[7.2858386e-17, 2.2066992e-02, 3.9520904e-01],
411+
[4.8012391e-01, 5.9377134e-01, 4.5147297e-01],
412+
[9.0728188e-01, 9.9387854e-01, 5.8399546e-01]], dtype=float32)
413+
>>> R1 = np.corrcoef(xarr)
414+
>>> R1
415+
array([[ 0.99999994, -0.6173796 , -0.9685411 ],
416+
[-0.6173796 , 1. , 0.7937219 ],
417+
[-0.9685411 , 0.7937219 , 0.9999999 ]], dtype=float32)
418+
419+
If we add another set of variables and observations ``yarr``, we can
420+
compute the row-wise Pearson correlation coefficients between the
421+
variables in ``xarr`` and ``yarr``.
422+
423+
>>> yarr = np.random.rand(3, 3).astype(np.float32)
424+
>>> yarr
425+
array([[0.17615308, 0.65354985, 0.15716429],
426+
[0.09373496, 0.2123185 , 0.84086883],
427+
[0.9011005 , 0.45206687, 0.00225109]], dtype=float32)
428+
>>> R2 = np.corrcoef(xarr, yarr)
429+
>>> R2
430+
array([[ 0.99999994, -0.6173796 , -0.968541 , -0.48613155, 0.9951523 ,
431+
-0.8900264 ],
432+
[-0.6173796 , 1. , 0.7937219 , 0.9875833 , -0.53702235,
433+
0.19083664],
434+
[-0.968541 , 0.7937219 , 0.9999999 , 0.6883078 , -0.9393724 ,
435+
0.74857277],
436+
[-0.48613152, 0.9875833 , 0.6883078 , 0.9999999 , -0.39783284,
437+
0.0342579 ],
438+
[ 0.9951523 , -0.53702235, -0.9393725 , -0.39783284, 0.99999994,
439+
-0.9305482 ],
440+
[-0.89002645, 0.19083665, 0.7485727 , 0.0342579 , -0.9305482 ,
441+
1. ]], dtype=float32)
442+
443+
Finally if we use the option ``rowvar=False``, the columns are now
444+
being treated as the variables and we will find the column-wise Pearson
445+
correlation coefficients between variables in ``xarr`` and ``yarr``.
446+
447+
>>> R3 = np.corrcoef(xarr, yarr, rowvar=False)
448+
>>> R3
449+
array([[ 1. , 0.9724453 , -0.9909503 , 0.8104691 , -0.46436927,
450+
-0.1643624 ],
451+
[ 0.9724453 , 1. , -0.9949381 , 0.6515728 , -0.6580445 ,
452+
0.07012729],
453+
[-0.99095035, -0.994938 , 1. , -0.72450536, 0.5790461 ,
454+
0.03047091],
455+
[ 0.8104691 , 0.65157276, -0.72450536, 1. , 0.14243561,
456+
-0.71102554],
457+
[-0.4643693 , -0.6580445 , 0.57904613, 0.1424356 , 0.99999994,
458+
-0.79727215],
459+
[-0.1643624 , 0.07012729, 0.03047091, -0.7110255 , -0.7972722 ,
460+
0.99999994]], dtype=float32)
461+
"""
462+
463+
out = dpnp.cov(x, y, rowvar, dtype=dtype)
464+
if out.ndim == 0:
465+
# scalar covariance
466+
# nan if incorrect value (nan, inf, 0), 1 otherwise
467+
return out / out
468+
469+
d = dpnp.diag(out)
470+
471+
stddev = dpnp.sqrt(d.real)
472+
out /= stddev[:, None]
473+
out /= stddev[None, :]
474+
475+
# Clip real and imaginary parts to [-1, 1]. This does not guarantee
476+
# abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without
477+
# excessive work.
478+
dpnp.clip(out.real, -1, 1, out=out.real)
479+
if dpnp.iscomplexobj(out):
480+
dpnp.clip(out.imag, -1, 1, out=out.imag)
481+
482+
return out
483+
484+
363485
def correlate(x1, x2, mode="valid"):
364486
"""
365487
Cross-correlation of two 1-dimensional sequences.

tests/skipped_tests.tbl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
310310
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
311311
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2
312312

313-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
314-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
315-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
316-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y
317-
318313
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
319314
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
320315
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]

tests/skipped_tests_gpu.tbl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -320,11 +320,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
320320
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
321321
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2
322322

323-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
324-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
325-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
326-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y
327-
328323
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
329324
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
330325
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]

tests/test_statistics.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,61 @@ def test_std_error(self):
573573
dpnp.std(ia, ddof="1")
574574

575575

576+
class TestCorrcoef:
577+
@pytest.mark.usefixtures(
578+
"suppress_divide_invalid_numpy_warnings",
579+
"suppress_dof_numpy_warnings",
580+
)
581+
@pytest.mark.parametrize("dtype", get_all_dtypes())
582+
@pytest.mark.parametrize("rowvar", [True, False])
583+
def test_corrcoef(self, dtype, rowvar):
584+
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
585+
np_array = dpnp.asnumpy(dp_array)
586+
587+
expected = numpy.corrcoef(np_array, rowvar=rowvar)
588+
result = dpnp.corrcoef(dp_array, rowvar=rowvar)
589+
590+
assert_dtype_allclose(result, expected)
591+
592+
@pytest.mark.usefixtures(
593+
"suppress_divide_invalid_numpy_warnings",
594+
"suppress_dof_numpy_warnings",
595+
"suppress_mean_empty_slice_numpy_warnings",
596+
)
597+
@pytest.mark.parametrize("shape", [(2, 0), (0, 2)])
598+
def test_corrcoef_empty(self, shape):
599+
dp_array = dpnp.empty(shape, dtype=dpnp.int64)
600+
np_array = dpnp.asnumpy(dp_array)
601+
602+
result = dpnp.corrcoef(dp_array)
603+
expected = numpy.corrcoef(np_array)
604+
assert_dtype_allclose(result, expected)
605+
606+
@pytest.mark.usefixtures("suppress_complex_warning")
607+
@pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True))
608+
@pytest.mark.parametrize("dt_out", get_float_complex_dtypes())
609+
def test_corrcoef_dtype(self, dt_in, dt_out):
610+
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in)
611+
np_array = dpnp.asnumpy(dp_array)
612+
613+
expected = numpy.corrcoef(np_array, dtype=dt_out)
614+
result = dpnp.corrcoef(dp_array, dtype=dt_out)
615+
assert expected.dtype == result.dtype
616+
assert_allclose(result, expected, rtol=1e-6)
617+
618+
@pytest.mark.usefixtures(
619+
"suppress_divide_invalid_numpy_warnings",
620+
"suppress_dof_numpy_warnings",
621+
)
622+
def test_corrcoef_scalar(self):
623+
dp_array = dpnp.array(5)
624+
np_array = dpnp.asnumpy(dp_array)
625+
626+
result = dpnp.corrcoef(dp_array)
627+
expected = numpy.corrcoef(np_array)
628+
assert_dtype_allclose(result, expected)
629+
630+
576631
@pytest.mark.usefixtures("allow_fall_back_on_numpy")
577632
class TestBincount:
578633
@pytest.mark.parametrize(

tests/test_sycl_queue.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ def test_meshgrid(device):
442442
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
443443
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
444444
pytest.param("copy", [1.0, 2.0, 3.0]),
445+
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
445446
pytest.param(
446447
"cos", [-dpnp.pi / 2, -dpnp.pi / 4, 0.0, dpnp.pi / 4, dpnp.pi / 2]
447448
),
@@ -693,6 +694,11 @@ def test_reduce_hypot(device):
693694
pytest.param("append", [1, 2, 3], [4, 5, 6]),
694695
pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]),
695696
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
697+
pytest.param(
698+
"corrcoef",
699+
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
700+
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
701+
),
696702
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
697703
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
698704
pytest.param(

tests/test_usm_type.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ def test_norm(usm_type, ord, axis):
576576
pytest.param("cbrt", [1, 8, 27]),
577577
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
578578
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
579+
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
579580
pytest.param(
580581
"cos", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2]
581582
),
@@ -685,6 +686,11 @@ def test_1in_1out(func, data, usm_type):
685686
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
686687
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
687688
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
689+
pytest.param(
690+
"corrcoef",
691+
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
692+
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
693+
),
688694
# dpnp.dot has 3 different implementations based on input arrays dtype
689695
# checking all of them
690696
pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]),

tests/third_party/cupy/statistics_tests/test_correlation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,26 @@
1212

1313
class TestCorrcoef(unittest.TestCase):
1414
@testing.for_all_dtypes()
15-
@testing.numpy_cupy_allclose()
15+
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
1616
def test_corrcoef(self, xp, dtype):
1717
a = testing.shaped_arange((2, 3), xp, dtype)
1818
return xp.corrcoef(a)
1919

2020
@testing.for_all_dtypes()
21-
@testing.numpy_cupy_allclose()
21+
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
2222
def test_corrcoef_diag_exception(self, xp, dtype):
2323
a = testing.shaped_arange((1, 3), xp, dtype)
2424
return xp.corrcoef(a)
2525

2626
@testing.for_all_dtypes()
27-
@testing.numpy_cupy_allclose()
27+
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
2828
def test_corrcoef_y(self, xp, dtype):
2929
a = testing.shaped_arange((2, 3), xp, dtype)
3030
y = testing.shaped_arange((2, 3), xp, dtype)
3131
return xp.corrcoef(a, y=y)
3232

3333
@testing.for_all_dtypes()
34-
@testing.numpy_cupy_allclose()
34+
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
3535
def test_corrcoef_rowvar(self, xp, dtype):
3636
a = testing.shaped_arange((2, 3), xp, dtype)
3737
y = testing.shaped_arange((2, 3), xp, dtype)

0 commit comments

Comments
 (0)