Skip to content

Commit 62bdf95

Browse files
committed
Emit FutureWarning only for different indexes
1 parent 89292b9 commit 62bdf95

File tree

3 files changed

+91
-26
lines changed

3 files changed

+91
-26
lines changed

doc/source/text.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ the ``join``-keyword, which controls the manner of alignment.
280280
.. warning::
281281

282282
If the ``join`` keyword is not passed, the method :meth:`~Series.str.cat` will currently fall back to the behavior before version 0.23.0 (i.e. no alignment),
283-
but a ``FutureWarning`` will be raised, since this default will change to ``join='left'`` in a future version.
283+
but a ``FutureWarning`` will be raised if any of the involved indexes differ, since this default will change to ``join='left'`` in a future version.
284284

285285
To usual options are available for ``join`` (one of ``'left', 'outer', 'inner', 'right'``).
286286
In particular, alignment also means that the different lengths do not need to coincide anymore.
@@ -305,7 +305,7 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
305305
Concatenating a Series and many objects into a Series
306306
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
307307

308-
All list-likes (including iterators, ``dict``-views, etc.) can be arbitrarily combined in a list-like container:
308+
All list-likes (including iterators, ``dict``-views, etc.) can be arbitrarily combined in a list-like container:
309309

310310
.. ipython:: python
311311

pandas/core/strings.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,34 +1962,37 @@ def _get_series_list(self, others, ignore_index=False):
19621962
idx = self._orig if isinstance(self._orig, Index) else self._orig.index
19631963

19641964
if isinstance(others, Series):
1965+
fut_warn = not others.index.equals(idx)
19651966
los = [Series(others.values, index=idx)
1966-
if ignore_index else others]
1967-
return (los, True)
1967+
if ignore_index and fut_warn else others]
1968+
return (los, fut_warn)
19681969
elif isinstance(others, Index):
1970+
fut_warn = not others.equals(idx)
19691971
los = [Series(others.values,
19701972
index=(idx if ignore_index else others))]
1971-
return (los, True)
1973+
return (los, fut_warn)
19721974
elif isinstance(others, DataFrame):
1973-
if ignore_index:
1975+
fut_warn = not others.index.equals(idx)
1976+
if ignore_index and fut_warn:
19741977
# without copy, this could change (the corresponding list
19751978
# element of) "others" that was passed to str.cat
19761979
others = others.copy()
19771980
others.index = idx
1978-
return ([others[x] for x in others], True)
1981+
return ([others[x] for x in others], fut_warn)
19791982
elif isinstance(others, np.ndarray) and others.ndim == 2:
19801983
others = DataFrame(others, index=idx)
19811984
return ([others[x] for x in others], False)
19821985
elif is_list_like(others):
19831986
others = list(others) # ensure iterators do not get read twice etc
19841987
if all(is_list_like(x) for x in others):
19851988
los = []
1986-
fuwa = False
1989+
fut_warn = False
19871990
while others:
19881991
tmp = self._get_series_list(others.pop(0),
19891992
ignore_index=ignore_index)
19901993
los = los + tmp[0]
1991-
fuwa = fuwa or tmp[1]
1992-
return (los, fuwa)
1994+
fut_warn = fut_warn or tmp[1]
1995+
return (los, fut_warn)
19931996
else:
19941997
return ([Series(others, index=idx)], False)
19951998
raise ValueError('others must be Series, Index, DataFrame, '
@@ -2145,7 +2148,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21452148
else: # Series
21462149
data = self._orig
21472150

2148-
# concatenate Series into itself if no "others"
2151+
# concatenate Series/Index with itself if no "others"
21492152
if others is None:
21502153
result = str_cat(data, others=others, sep=sep, na_rep=na_rep)
21512154
return self._wrap_result(result,

pandas/tests/test_strings.py

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
import pandas.core.strings as strings
2020

2121

22-
def assert_series_or_index_equal(left, right):
22+
def assert_series_or_index_equal(left, right, expect_warn=False):
2323
if isinstance(left, Series):
2424
assert_series_equal(left, right)
25-
else:
25+
else: # Index
2626
assert_index_equal(left, right)
2727

2828

@@ -158,20 +158,32 @@ def test_str_cat(self, series_or_index):
158158
exp = Index(['aa', 'a-', 'bb', 'bd', 'cfoo', '--'])
159159
if series_or_index == 'series':
160160
exp = Series(exp)
161+
# s.index / s is different from t (as Index) -> warning
161162
with tm.assert_produces_warning(expected_warning=FutureWarning):
162163
# FutureWarning to switch to alignment by default
163164
assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp)
164165

165166
# Series/Index with Series
166167
t = Series(t)
168+
# s as Series has same index as t -> no warning
169+
# s as Index is different from t.index -> warning
170+
if series_or_index == 'series':
171+
assert_series_equal(s.str.cat(t, na_rep='-'), exp)
172+
else:
173+
with tm.assert_produces_warning(expected_warning=FutureWarning):
174+
# FutureWarning to switch to alignment by default
175+
assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp)
176+
177+
# Series/Index with Series: warning if different indexes
178+
t.index = t.index + 1
167179
with tm.assert_produces_warning(expected_warning=FutureWarning):
168180
# FutureWarning to switch to alignment by default
169181
assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp)
170182

171-
# Series/Index with array (no warning necessary)
183+
# Series/Index with array
172184
assert_series_or_index_equal(s.str.cat(t.values, na_rep='-'), exp)
173185

174-
# Series/Index with list (no warning necessary)
186+
# Series/Index with list
175187
assert_series_or_index_equal(s.str.cat(list(t), na_rep='-'), exp)
176188

177189
# errors for incorrect lengths
@@ -219,15 +231,29 @@ def test_str_cat_categorical(self, series_or_index,
219231
exp = Index(['ab', 'aa', 'bb', 'ac'], dtype=dtype_caller)
220232
if series_or_index == 'series':
221233
exp = Series(exp)
234+
222235
# Series/Index with Index
236+
# s.index / s is different from t (as Index) -> warning
223237
with tm.assert_produces_warning(expected_warning=FutureWarning):
224238
# FutureWarning to switch to alignment by default
225239
assert_series_or_index_equal(s.str.cat(t), exp)
226240

227241
# Series/Index with Series
242+
t = Series(t)
243+
# s as Series has same index as t -> no warning
244+
# s as Index is different from t.index -> warning
245+
if series_or_index == 'series':
246+
assert_series_equal(s.str.cat(t), exp)
247+
else:
248+
with tm.assert_produces_warning(expected_warning=FutureWarning):
249+
# FutureWarning to switch to alignment by default
250+
assert_series_or_index_equal(s.str.cat(t), exp)
251+
252+
# Series/Index with Series: warning if different indexes
253+
t.index = t.index + 1
228254
with tm.assert_produces_warning(expected_warning=FutureWarning):
229255
# FutureWarning to switch to alignment by default
230-
assert_series_or_index_equal(s.str.cat(Series(t)), exp)
256+
assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp)
231257

232258
@pytest.mark.parametrize('series_or_index', ['series', 'index'])
233259
def test_str_cat_mixed_inputs(self, series_or_index):
@@ -240,28 +266,62 @@ def test_str_cat_mixed_inputs(self, series_or_index):
240266
exp = Index(['aAa', 'bBb', 'cCc', 'dDd'])
241267
if series_or_index == 'series':
242268
exp = Series(exp)
269+
243270
# Series/Index with DataFrame
271+
# s as Series has same index as d -> no warning
272+
# s as Index is different from d.index -> warning
273+
if series_or_index == 'series':
274+
assert_series_equal(s.str.cat(d), exp)
275+
else:
276+
with tm.assert_produces_warning(expected_warning=FutureWarning):
277+
# FutureWarning to switch to alignment by default
278+
assert_series_or_index_equal(s.str.cat(d), exp)
279+
280+
# Series/Index with DataFrame: warning if different indexes
281+
d.index = d.index + 1
244282
with tm.assert_produces_warning(expected_warning=FutureWarning):
245283
# FutureWarning to switch to alignment by default
246284
assert_series_or_index_equal(s.str.cat(d), exp)
247285

248-
# Series/Index with two-dimensional ndarray (no warning necessary)
286+
# Series/Index with two-dimensional ndarray
249287
assert_series_or_index_equal(s.str.cat(d.values), exp)
250288

251289
# Series/Index with list of Series
290+
# s as Series has same index as t, s -> no warning
291+
# s as Index is different from t.index -> warning
292+
if series_or_index == 'series':
293+
assert_series_equal(s.str.cat([t, s]), exp)
294+
else:
295+
with tm.assert_produces_warning(expected_warning=FutureWarning):
296+
# FutureWarning to switch to alignment by default
297+
assert_series_or_index_equal(s.str.cat([t, s]), exp)
298+
299+
# Series/Index with list of Series: warning if different indexes
300+
tt = t.copy()
301+
tt.index = tt.index + 1
252302
with tm.assert_produces_warning(expected_warning=FutureWarning):
253303
# FutureWarning to switch to alignment by default
254-
assert_series_or_index_equal(s.str.cat([t, s]), exp)
304+
assert_series_or_index_equal(s.str.cat([tt, s]), exp)
255305

256-
# Series/Index with list of list-likes (no warning necessary)
306+
# Series/Index with list of list-likes
257307
assert_series_or_index_equal(s.str.cat([t.values, list(s)]), exp)
258308

259309
# Series/Index with mixed list of Series/list-like
310+
# s as Series has same index as t -> no warning
311+
# s as Index is different from t.index -> warning
312+
if series_or_index == 'series':
313+
assert_series_equal(s.str.cat([t, s.values]), exp)
314+
else:
315+
with tm.assert_produces_warning(expected_warning=FutureWarning):
316+
# FutureWarning to switch to alignment by default
317+
assert_series_or_index_equal(s.str.cat([t, s.values]), exp)
318+
319+
# Series/Index with mixed list: warning if different indexes
260320
with tm.assert_produces_warning(expected_warning=FutureWarning):
261321
# FutureWarning to switch to alignment by default
262-
assert_series_or_index_equal(s.str.cat([t.values, s]), exp)
322+
assert_series_or_index_equal(s.str.cat([tt, s.values]), exp)
263323

264-
# Series/Index with iterator of list-likes (no warning necessary)
324+
# Series/Index with iterator of list-likes
265325
assert_series_or_index_equal(s.str.cat(iter([t.values, list(s)])), exp)
266326

267327
# errors for incorrect lengths
@@ -300,13 +360,13 @@ def test_str_cat_align_indexed(self, series_or_index, join):
300360
s = Series(['a', 'b', 'c', 'd'], index=['a', 'b', 'c', 'd'])
301361
t = Series(['D', 'A', 'E', 'B'], index=['d', 'a', 'e', 'b'])
302362
sa, ta = s.align(t, join=join)
363+
# result after manual alignment of inputs
364+
exp = sa.str.cat(ta, na_rep='-')
365+
303366
if series_or_index == 'index':
304367
s = Index(s)
305368
sa = Index(sa)
306-
307-
with tm.assert_produces_warning(expected_warning=FutureWarning):
308-
# result of mamnual alignmnent of inputs
309-
exp = sa.str.cat(ta, na_rep='-')
369+
exp = Index(exp)
310370

311371
assert_series_or_index_equal(s.str.cat(t, join=join, na_rep='-'), exp)
312372

@@ -329,6 +389,7 @@ def test_str_cat_align_mixed_inputs(self, join):
329389
# mixed list of indexed/unindexed
330390
u = ['A', 'B', 'C', 'D']
331391
exp_outer = Series(['aaA', 'bbB', 'c-C', 'ddD', '-e-'])
392+
# u will be forced have index of s -> use s here as placeholder
332393
e = concat([t, s], axis=1, join=(join if join == 'inner' else 'outer'))
333394
sa, ea = s.align(e, join=join)
334395
exp = exp_outer.loc[ea.index]
@@ -355,6 +416,7 @@ def test_str_cat_special_cases(self):
355416
mix = [t, t.values, ['A', 'B', 'C', 'D'], d, d.values]
356417
exp = Series(['addAdddd', 'baaBaaaa', 'ceeCeeee', 'dbbDbbbb'])
357418
with tm.assert_produces_warning(expected_warning=FutureWarning):
419+
# FutureWarning to switch to alignment by default
358420
tm.assert_series_equal(s.str.cat(mix, join=None), exp)
359421

360422
# lists of elements with different types - aligned with na_rep
@@ -367,7 +429,7 @@ def test_str_cat_special_cases(self):
367429
tm.assert_series_equal(s.str.cat(iter(mix), join='outer', na_rep='-'),
368430
exp)
369431

370-
# right-align with different indexes in other
432+
# right-align with different indexes in others
371433
exp = Series(['aa--', 'd-dd'], index=[0, 3])
372434
tm.assert_series_equal(s.str.cat([t.loc[[0]], d.loc[[3]]],
373435
join='right', na_rep='-'), exp)

0 commit comments

Comments
 (0)