Skip to content

Commit 93f6042

Browse files
committed
Restrict legal argument combinations; no nesting
1 parent 62bdf95 commit 93f6042

File tree

4 files changed

+76
-44
lines changed

4 files changed

+76
-44
lines changed

doc/source/text.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -305,18 +305,20 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
305305
Concatenating a Series and many objects into a Series
306306
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
307307

308-
All list-likes (including iterators, ``dict``-views, etc.) can be arbitrarily combined in a list-like container:
308+
All one-dimensional list-likes can be arbitrarily combined in a list-like container (including iterators, ``dict``-views, etc.):
309309

310310
.. ipython:: python
311311
312-
s.str.cat([u, t.values, ['A', 'B', 'C', 'D'], d.values, f], na_rep='-')
312+
s
313+
u
314+
s.str.cat([u, pd.Index(u.values), ['A', 'B', 'C', 'D']], na_rep='-')
313315
314-
All elements must match in length to the calling ``Series``, except those having an index if ``join`` is not None:
316+
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
315317

316318
.. ipython:: python
317319
318-
s.str.cat([u, v, ['A', 'B', 'C', 'D'], d.values, f.loc[[1]]],
319-
join='outer', na_rep='-')
320+
v
321+
s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
320322
321323
If using ``join='right'`` on a list of ``others`` that contains different indexes,
322324
the union of these indexes will be used as the basis for the final concatenation:

doc/source/whatsnew/v0.23.0.txt

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,9 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
314314
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
315315

316316
Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`).
317-
The method has now gained a keyword ``join`` to control the manner of alignment. In v.0.23 it will default to None (meaning no alignment), but this default will change
318-
to ``'left'`` in a future version of pandas.
317+
The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and in :ref:`here <text.concatenate>`.
318+
319+
In v.0.23 `join` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas.
319320

320321
.. ipython:: python
321322

@@ -324,12 +325,9 @@ to ``'left'`` in a future version of pandas.
324325
s.str.cat(t)
325326
s.str.cat(t, join='left', na_rep='-')
326327

327-
In particular, ``others`` does not need to be of the same length as the calling ``Series`` (if both have an index and ``join is not None``).
328-
For more examples, see :ref:`here <text.concatenate>`.
329-
330-
For categorical data, it is now possible to call :meth:`Series.str.cat` for ``CategoricalIndex`` as well (previously raised a ``ValueError``).
331-
Finally, if ``others is not None``, the resulting ``Series``/``Index`` will now remain categorical if the calling
332-
``Series``/``Index`` is categorical.
328+
Furthermore:
329+
- meth:`Series.str.cat` now works as well for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`)
330+
- If concatenating with something (i.e. `others is not None`) the resulting ``Series``/``Index`` will now remain categorical if the calling ``Series``/``Index`` is categorical (see :issue:`20843`)
333331

334332
.. _whatsnew_0230.enhancements.astype_category:
335333

pandas/core/strings.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,19 +1938,20 @@ def cons_row(x):
19381938
def _get_series_list(self, others, ignore_index=False):
19391939
"""
19401940
Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
1941-
into list of Series (elements without an index must match the length of
1942-
the calling Series/Index).
1941+
into a list of Series (elements without an index must match the length
1942+
of the calling Series/Index).
19431943
19441944
Parameters
19451945
----------
1946-
input : Series, DataFrame, np.ndarrary, list-like or list-like of those
1946+
input : Series, DataFrame, np.ndarray, list-like or list-like of
1947+
objects that are either Series, np.ndarray (1-dim) or list-like
19471948
ignore_index : boolean, default False
19481949
Determines whether to forcefully align with index of the caller
19491950
19501951
Returns
19511952
-------
1952-
tuple : first element: input transformed into list of Series
1953-
second element: Boolean whether FutureWarning should be raised
1953+
tuple : (input transformed into list of Series,
1954+
Boolean whether FutureWarning should be raised)
19541955
"""
19551956

19561957
# once str.cat defaults to alignment, this function can be simplified;
@@ -1961,6 +1962,10 @@ def _get_series_list(self, others, ignore_index=False):
19611962
# self._orig is either Series or Index
19621963
idx = self._orig if isinstance(self._orig, Index) else self._orig.index
19631964

1965+
err_msg = ('others must be Series, Index, DataFrame, np.ndarrary or '
1966+
'list-like (either containing only strings or containing '
1967+
'only objects of type Series/Index/list-like/np.ndarray')
1968+
19641969
if isinstance(others, Series):
19651970
fut_warn = not others.index.equals(idx)
19661971
los = [Series(others.values, index=idx)
@@ -1988,15 +1993,32 @@ def _get_series_list(self, others, ignore_index=False):
19881993
los = []
19891994
fut_warn = False
19901995
while others:
1991-
tmp = self._get_series_list(others.pop(0),
1992-
ignore_index=ignore_index)
1996+
nxt = others.pop(0)
1997+
# safety for iterators etc.; exclude indexed objects
1998+
if (is_list_like(nxt) and
1999+
not isinstance(nxt, (DataFrame, Series, Index))):
2000+
nxt = list(nxt)
2001+
2002+
# nested list-likes are forbidden - content must be strings
2003+
is_legal = (is_list_like(nxt) and
2004+
all(isinstance(x, compat.string_types)
2005+
for x in nxt))
2006+
# DataFrame is false positive of is_legal
2007+
# because "x in df" returns column names
2008+
if isinstance(nxt, DataFrame) or not is_legal:
2009+
raise TypeError(err_msg)
2010+
2011+
tmp = self._get_series_list(nxt, ignore_index=ignore_index)
19932012
los = los + tmp[0]
19942013
fut_warn = fut_warn or tmp[1]
19952014
return (los, fut_warn)
2015+
# test if there is a mix of list-like and string/NaN/None
2016+
elif (any(is_list_like(x) for x in others)
2017+
and any(not is_list_like(x) for x in others)):
2018+
raise TypeError(err_msg)
19962019
else:
19972020
return ([Series(others, index=idx)], False)
1998-
raise ValueError('others must be Series, Index, DataFrame, '
1999-
'np.ndarrary or list-like')
2021+
raise TypeError(err_msg)
20002022

20012023
def cat(self, others=None, sep=None, na_rep=None, join=None):
20022024
"""
@@ -2015,9 +2037,9 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
20152037
calling Series/Index, with the exception of indexed objects (i.e.
20162038
Series/Index/DataFrame) if `join` is not None.
20172039
2018-
If others is a list-like that contains an arbitrary combination of
2019-
the above, then all elements will be unpacked and must satisfy the
2020-
above criteria individually.
2040+
If others is a list-like that contains a combination of Series,
2041+
np.ndarray (1-dim) or list-like, then all elements will be unpacked
2042+
and must satisfy the above criteria individually.
20212043
20222044
If others is None, the method returns the concatenation of all
20232045
strings in the calling Series/Index.
@@ -2158,7 +2180,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21582180
# turn anything in "others" into lists of Series
21592181
tmp = self._get_series_list(others, ignore_index=(join is None))
21602182
others, fut_warn = tmp
2161-
except ValueError:
2183+
except ValueError: # let TypeError raised by _get_series_list pass
21622184
if join is None:
21632185
# legacy warning
21642186
raise ValueError('All arrays must be same length')

pandas/tests/test_strings.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,29 @@ def test_str_cat_mixed_inputs(self, series_or_index):
349349
with tm.assert_raises_regex(ValueError, rgx):
350350
s.str.cat([z, list(s)])
351351

352+
# errors for incorrect arguments in list-like
353+
rgx = 'others must be Series, Index, DataFrame,.*'
354+
355+
# mix of string and Series
356+
with tm.assert_raises_regex(TypeError, rgx):
357+
s.str.cat([s, 's'])
358+
359+
# DataFrame in list
360+
with tm.assert_raises_regex(TypeError, rgx):
361+
s.str.cat([s, d])
362+
363+
# 2-dim ndarray in list
364+
with tm.assert_raises_regex(TypeError, rgx):
365+
s.str.cat([s, d.values])
366+
367+
# nested lists
368+
with tm.assert_raises_regex(TypeError, rgx):
369+
s.str.cat([s, [s, d]])
370+
371+
# forbidden input type, e.g. int
372+
with tm.assert_raises_regex(TypeError, rgx):
373+
s.str.cat(1)
374+
352375
@pytest.mark.parametrize('series_or_index, join', [
353376
('series', 'left'), ('series', 'outer'),
354377
('series', 'inner'), ('series', 'right'),
@@ -410,28 +433,15 @@ def test_str_cat_align_mixed_inputs(self, join):
410433
def test_str_cat_special_cases(self):
411434
s = Series(['a', 'b', 'c', 'd'])
412435
t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1])
413-
d = concat([t, t], axis=1)
414-
415-
# lists of elements with different types - unaligned
416-
mix = [t, t.values, ['A', 'B', 'C', 'D'], d, d.values]
417-
exp = Series(['addAdddd', 'baaBaaaa', 'ceeCeeee', 'dbbDbbbb'])
418-
with tm.assert_produces_warning(expected_warning=FutureWarning):
419-
# FutureWarning to switch to alignment by default
420-
tm.assert_series_equal(s.str.cat(mix, join=None), exp)
421-
422-
# lists of elements with different types - aligned with na_rep
423-
exp = Series(['aadAaadd', 'bbaBbbaa', 'c-eC--ee', 'ddbDddbb'])
424-
tm.assert_series_equal(s.str.cat(mix, join='left', na_rep='-'), exp)
425436

426437
# iterator of elements with different types
427-
exp = Series(['aadAaadd', 'bbaBbbaa', 'c-eC--ee',
428-
'ddbDddbb', '-e--ee--'])
429-
tm.assert_series_equal(s.str.cat(iter(mix), join='outer', na_rep='-'),
430-
exp)
438+
exp = Series(['aaA', 'bbB', 'c-C', 'ddD', '-e-'])
439+
tm.assert_series_equal(s.str.cat(iter([t, ['A', 'B', 'C', 'D']]),
440+
join='outer', na_rep='-'), exp)
431441

432442
# right-align with different indexes in others
433-
exp = Series(['aa--', 'd-dd'], index=[0, 3])
434-
tm.assert_series_equal(s.str.cat([t.loc[[0]], d.loc[[3]]],
443+
exp = Series(['aa-', 'd-d'], index=[0, 3])
444+
tm.assert_series_equal(s.str.cat([t.loc[[0]], t.loc[[3]]],
435445
join='right', na_rep='-'), exp)
436446

437447
def test_cat_on_filtered_index(self):

0 commit comments

Comments
 (0)