Skip to content

Commit ac45f88

Browse files
authored
Adjust tests in base folder for arrow string option (#56124)
1 parent 2ed994f commit ac45f88

File tree

5 files changed

+42
-13
lines changed

5 files changed

+42
-13
lines changed

pandas/tests/base/test_constructors.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ class TestConstruction:
138138
"object-string",
139139
],
140140
)
141-
def test_constructor_datetime_outofbound(self, a, constructor):
141+
def test_constructor_datetime_outofbound(
142+
self, a, constructor, request, using_infer_string
143+
):
142144
# GH-26853 (+ bug GH-26206 out of bound non-ns unit)
143145

144146
# No dtype specified (dtype inference)
@@ -150,7 +152,10 @@ def test_constructor_datetime_outofbound(self, a, constructor):
150152
assert result.dtype == "M8[s]"
151153
else:
152154
result = constructor(a)
153-
assert result.dtype == "object"
155+
if using_infer_string and "object-string" in request.node.callspec.id:
156+
assert result.dtype == "string"
157+
else:
158+
assert result.dtype == "object"
154159
tm.assert_numpy_array_equal(result.to_numpy(), a)
155160

156161
# Explicit dtype specified

pandas/tests/base/test_conversion.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
SparseArray,
2121
TimedeltaArray,
2222
)
23+
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
2324

2425

2526
class TestToIterable:
@@ -215,7 +216,9 @@ def test_iter_box_period(self):
215216
),
216217
],
217218
)
218-
def test_values_consistent(arr, expected_type, dtype):
219+
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
220+
if using_infer_string and dtype == "object":
221+
expected_type = ArrowStringArrayNumpySemantics
219222
l_values = Series(arr)._values
220223
r_values = pd.Index(arr)._values
221224
assert type(l_values) is expected_type
@@ -358,17 +361,23 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
358361
@pytest.mark.parametrize(
359362
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
360363
)
361-
def test_to_numpy_copy(arr, as_series):
364+
def test_to_numpy_copy(arr, as_series, using_infer_string):
362365
obj = pd.Index(arr, copy=False)
363366
if as_series:
364367
obj = Series(obj.values, copy=False)
365368

366369
# no copy by default
367370
result = obj.to_numpy()
368-
assert np.shares_memory(arr, result) is True
371+
if using_infer_string and arr.dtype == object:
372+
assert np.shares_memory(arr, result) is False
373+
else:
374+
assert np.shares_memory(arr, result) is True
369375

370376
result = obj.to_numpy(copy=False)
371-
assert np.shares_memory(arr, result) is True
377+
if using_infer_string and arr.dtype == object:
378+
assert np.shares_memory(arr, result) is False
379+
else:
380+
assert np.shares_memory(arr, result) is True
372381

373382
# copy=True
374383
result = obj.to_numpy(copy=True)

pandas/tests/base/test_misc.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_pyarrow_string_dtype
7+
68
from pandas.compat import PYPY
79

810
from pandas.core.dtypes.common import (
@@ -80,7 +82,10 @@ def test_ndarray_compat_properties(index_or_series_obj):
8082
assert Series([1]).item() == 1
8183

8284

83-
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
85+
@pytest.mark.skipif(
86+
PYPY or using_pyarrow_string_dtype(),
87+
reason="not relevant for PyPy doesn't work properly for arrow strings",
88+
)
8489
def test_memory_usage(index_or_series_memory_obj):
8590
obj = index_or_series_memory_obj
8691
# Clear index caches so that len(obj) == 0 report 0 memory usage
@@ -175,7 +180,9 @@ def test_access_by_position(index_flat):
175180
assert index[-1] == index[size - 1]
176181

177182
msg = f"index {size} is out of bounds for axis 0 with size {size}"
178-
if is_dtype_equal(index.dtype, "string[pyarrow]"):
183+
if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
184+
index.dtype, "string[pyarrow_numpy]"
185+
):
179186
msg = "index out of bounds"
180187
with pytest.raises(IndexError, match=msg):
181188
index[size]

pandas/tests/base/test_unique.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_pyarrow_string_dtype
5+
46
import pandas as pd
57
import pandas._testing as tm
68
from pandas.tests.base.common import allow_na_ops
@@ -98,6 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):
98100

99101

100102
@pytest.mark.single_cpu
103+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
101104
def test_unique_bad_unicode(index_or_series):
102105
# regression test for #34550
103106
uval = "\ud83d" # smiley emoji

pandas/tests/base/test_value_counts.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Series,
1515
Timedelta,
1616
TimedeltaIndex,
17+
array,
1718
)
1819
import pandas._testing as tm
1920
from pandas.tests.base.common import allow_na_ops
@@ -113,7 +114,7 @@ def test_value_counts_null(null_obj, index_or_series_obj):
113114
tm.assert_series_equal(result, expected)
114115

115116

116-
def test_value_counts_inferred(index_or_series):
117+
def test_value_counts_inferred(index_or_series, using_infer_string):
117118
klass = index_or_series
118119
s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"]
119120
s = klass(s_values)
@@ -125,7 +126,9 @@ def test_value_counts_inferred(index_or_series):
125126
tm.assert_index_equal(s.unique(), exp)
126127
else:
127128
exp = np.unique(np.array(s_values, dtype=np.object_))
128-
tm.assert_numpy_array_equal(s.unique(), exp)
129+
if using_infer_string:
130+
exp = array(exp)
131+
tm.assert_equal(s.unique(), exp)
129132

130133
assert s.nunique() == 4
131134
# don't sort, have to sort after the fact as not sorting is
@@ -147,7 +150,7 @@ def test_value_counts_inferred(index_or_series):
147150
tm.assert_series_equal(hist, expected)
148151

149152

150-
def test_value_counts_bins(index_or_series):
153+
def test_value_counts_bins(index_or_series, using_infer_string):
151154
klass = index_or_series
152155
s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"]
153156
s = klass(s_values)
@@ -201,7 +204,9 @@ def test_value_counts_bins(index_or_series):
201204
tm.assert_index_equal(s.unique(), exp)
202205
else:
203206
exp = np.array(["a", "b", np.nan, "d"], dtype=object)
204-
tm.assert_numpy_array_equal(s.unique(), exp)
207+
if using_infer_string:
208+
exp = array(exp)
209+
tm.assert_equal(s.unique(), exp)
205210
assert s.nunique() == 3
206211

207212
s = klass({}) if klass is dict else klass({}, dtype=object)
@@ -246,7 +251,7 @@ def test_value_counts_datetime64(index_or_series, unit):
246251
expected_s = Series([3, 2, 1], index=idx, name="count")
247252
tm.assert_series_equal(s.value_counts(), expected_s)
248253

249-
expected = pd.array(
254+
expected = array(
250255
np.array(
251256
["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
252257
dtype=f"datetime64[{unit}]",

0 commit comments

Comments
 (0)