Skip to content

Commit 89292b9

Browse files
committed
Incorporate review feedback
1 parent 7a8ac7c commit 89292b9

File tree

4 files changed

+59
-71
lines changed

4 files changed

+59
-71
lines changed

doc/source/text.rst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ The parameter ``others`` can also be two-dimensional. In this case, the number o
257257
.. ipython:: python
258258
259259
d = pd.concat([t, s], axis=1)
260+
s
260261
d
261262
s.str.cat(d, na_rep='-')
262263
@@ -271,6 +272,8 @@ the ``join``-keyword, which controls the manner of alignment.
271272
.. ipython:: python
272273
273274
u = pd.Series(['b', 'd', 'a', 'c'], index=[1, 3, 0, 2])
275+
s
276+
u
274277
s.str.cat(u)
275278
s.str.cat(u, join='left')
276279
@@ -285,6 +288,8 @@ In particular, alignment also means that the different lengths do not need to co
285288
.. ipython:: python
286289
287290
v = pd.Series(['z', 'a', 'b', 'd', 'e'], index=[-1, 0, 1, 3, 4])
291+
s
292+
v
288293
s.str.cat(v, join='left', na_rep='-')
289294
s.str.cat(v, join='outer', na_rep='-')
290295
@@ -293,13 +298,14 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
293298
.. ipython:: python
294299
295300
f = d.loc[[3, 2, 1, 0], :]
301+
s
296302
f
297303
s.str.cat(f, join='left', na_rep='-')
298304
299305
Concatenating a Series and many objects into a Series
300306
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
301307

302-
All list-likes (as well as ``DataFrame`` and two-dimensional ``ndarray``) can be arbitrarily combined in a list-like container:
308+
All list-likes (including iterators, ``dict``-views, etc.) can be arbitrarily combined in a list-like container:
303309

304310
.. ipython:: python
305311
@@ -317,15 +323,10 @@ the union of these indexes will be used as the basis for the final concatenation
317323

318324
.. ipython:: python
319325
326+
u.loc[[3]]
327+
v.loc[[-1, 0]]
320328
s.str.cat([u.loc[[3]], v.loc[[-1, 0]]], join='right', na_rep='-')
321329
322-
Finally, the surrounding container can also be an :obj:`Iterable` other than a ``list`` (e.g. an iterator, or a ``dict``-view, etc.):
323-
324-
.. ipython:: python
325-
326-
from collections import OrderedDict
327-
s.str.cat(d.to_dict('series', into=OrderedDict).values(), na_rep='-')
328-
329330
Indexing with ``.str``
330331
----------------------
331332

doc/source/whatsnew/v0.23.0.txt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -326,17 +326,6 @@ to ``'left'`` in a future version of pandas.
326326

327327
In particular, ``others`` does not need to be of the same length as the calling ``Series`` (if both have an index and ``join is not None``).
328328
For more examples, see :ref:`here <text.concatenate>`.
329-
330-
Additionally, ``str.cat`` now allows ``others`` to be a ``DataFrame`` or two-dimensional ``np.ndarray``.
331-
332-
.. ipython:: python
333-
334-
u = pd.Series(['b', 'd', 'a', 'c'], index=[1, 3, 0, 2])
335-
d = pd.concat([s, u], axis=1)
336-
t.str.cat(d.values)
337-
s.str.cat(d, join='left', na_rep='-')
338-
339-
Furthermore, any combination of "concatenateable" arguments can be passed in a list-like container (e.g. an iterator).
340329

341330
For categorical data, it is now possible to call :meth:`Series.str.cat` for ``CategoricalIndex`` as well (previously raised a ``ValueError``).
342331
Finally, if ``others is not None``, the resulting ``Series``/``Index`` will now remain categorical if the calling

pandas/core/strings.py

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,6 +1800,7 @@ class StringMethods(NoNewAttributesMixin):
18001800
def __init__(self, data):
18011801
self._validate(data)
18021802
self._is_categorical = is_categorical_dtype(data)
1803+
18031804
# .values.categories works for both Series/Index
18041805
self._data = data.values.categories if self._is_categorical else data
18051806
# save orig to blow up categoricals to the right type
@@ -1934,7 +1935,7 @@ def cons_row(x):
19341935
cons = self._orig._constructor
19351936
return cons(result, name=name, index=index)
19361937

1937-
def _str_cat_los(self, input, ignore_index=False):
1938+
def _get_series_list(self, others, ignore_index=False):
19381939
"""
19391940
Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
19401941
into list of Series (elements without an index must match the length of
@@ -1943,7 +1944,7 @@ def _str_cat_los(self, input, ignore_index=False):
19431944
Parameters
19441945
----------
19451946
input : Series, DataFrame, np.ndarrary, list-like or list-like of those
1946-
ignore_index : Boolean
1947+
ignore_index : boolean, default False
19471948
Determines whether to forcefully align with index of the caller
19481949
19491950
Returns
@@ -1955,46 +1956,44 @@ def _str_cat_los(self, input, ignore_index=False):
19551956
# once str.cat defaults to alignment, this function can be simplified;
19561957
# will not need `ignore_index` and the second boolean output anymore
19571958

1958-
from pandas.core.index import Index
1959-
from pandas.core.series import Series
1960-
from pandas.core.frame import DataFrame
1959+
from pandas import Index, Series, DataFrame
19611960

19621961
# self._orig is either Series or Index
19631962
idx = self._orig if isinstance(self._orig, Index) else self._orig.index
19641963

1965-
if isinstance(input, Series):
1966-
los = [Series(input.values, index=idx) if ignore_index else input]
1964+
if isinstance(others, Series):
1965+
los = [Series(others.values, index=idx)
1966+
if ignore_index else others]
19671967
return (los, True)
1968-
elif isinstance(input, Index):
1969-
los = [Series(input.values,
1970-
index=(idx if ignore_index else input))]
1968+
elif isinstance(others, Index):
1969+
los = [Series(others.values,
1970+
index=(idx if ignore_index else others))]
19711971
return (los, True)
1972-
elif isinstance(input, DataFrame):
1972+
elif isinstance(others, DataFrame):
19731973
if ignore_index:
19741974
# without copy, this could change (the corresponding list
19751975
# element of) "others" that was passed to str.cat
1976-
input = input.copy()
1977-
input.index = idx
1978-
return ([input[x] for x in input], True)
1979-
elif isinstance(input, np.ndarray) and input.ndim == 2:
1980-
input = DataFrame(input, index=idx)
1981-
return ([input[x] for x in input], False)
1982-
elif is_list_like(input):
1983-
input = list(input) # ensure iterators do not get read twice, etc.
1984-
if all(is_list_like(x) for x in input):
1976+
others = others.copy()
1977+
others.index = idx
1978+
return ([others[x] for x in others], True)
1979+
elif isinstance(others, np.ndarray) and others.ndim == 2:
1980+
others = DataFrame(others, index=idx)
1981+
return ([others[x] for x in others], False)
1982+
elif is_list_like(others):
1983+
others = list(others) # ensure iterators do not get read twice etc
1984+
if all(is_list_like(x) for x in others):
19851985
los = []
19861986
fuwa = False
1987-
while input:
1988-
tmp = self._str_cat_los(input.pop(0),
1989-
ignore_index=ignore_index)
1987+
while others:
1988+
tmp = self._get_series_list(others.pop(0),
1989+
ignore_index=ignore_index)
19901990
los = los + tmp[0]
19911991
fuwa = fuwa or tmp[1]
19921992
return (los, fuwa)
19931993
else:
1994-
return ([Series(input, index=idx)], False)
1995-
else:
1996-
raise ValueError('input must be Series, Index, DataFrame, '
1997-
'np.ndarrary or list-like')
1994+
return ([Series(others, index=idx)], False)
1995+
raise ValueError('others must be Series, Index, DataFrame, '
1996+
'np.ndarrary or list-like')
19981997

19991998
def cat(self, others=None, sep=None, na_rep=None, join=None):
20001999
"""
@@ -2136,11 +2135,9 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21362135
21372136
For more examples, see :ref:`here <text.concatenate>`.
21382137
"""
2139-
from pandas.core.index import Index
2140-
from pandas.core.series import Series
2141-
from pandas.core.reshape.concat import concat
2138+
from pandas import Index, Series, concat
21422139

2143-
if isinstance(others, str):
2140+
if isinstance(others, compat.string_types):
21442141
raise ValueError("Did you mean to supply a `sep` keyword?")
21452142

21462143
if isinstance(self._orig, Index):
@@ -2156,8 +2153,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21562153

21572154
try:
21582155
# turn anything in "others" into lists of Series
2159-
others, fuwa = self._str_cat_los(others,
2160-
ignore_index=(join is None))
2156+
tmp = self._get_series_list(others, ignore_index=(join is None))
2157+
others, fut_warn = tmp
21612158
except ValueError:
21622159
if join is None:
21632160
# legacy warning
@@ -2168,7 +2165,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21682165
'must all be of the same length as the '
21692166
'calling Series/Index.')
21702167

2171-
if join is None and fuwa:
2168+
if join is None and fut_warn:
21722169
warnings.warn("A future version of pandas will perform index "
21732170
"alignment when `others` is a Series/Index/"
21742171
"DataFrame (or a list-like containing one). To "

pandas/tests/test_strings.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,12 @@ def test_cat(self):
132132
exp = np.array(['aa', NA, 'bb', 'bd', 'cfoo', NA], dtype=np.object_)
133133
tm.assert_almost_equal(result, exp)
134134

135-
@pytest.mark.parametrize('ser_or_ind', ['series', 'index'])
136-
def test_str_cat(self, ser_or_ind):
135+
@pytest.mark.parametrize('series_or_index', ['series', 'index'])
136+
def test_str_cat(self, series_or_index):
137137
# test_cat above tests "str_cat" from ndarray to ndarray;
138138
# here testing "str.cat" from Series/Index to Series/Index/ndarray/list
139139
s = Index(['a', 'a', 'b', 'b', 'c', np.nan])
140-
if ser_or_ind == 'series':
140+
if series_or_index == 'series':
141141
s = Series(s)
142142
t = Index(['a', np.nan, 'b', 'd', 'foo', np.nan])
143143

@@ -156,7 +156,7 @@ def test_str_cat(self, ser_or_ind):
156156

157157
# Series/Index with Index
158158
exp = Index(['aa', 'a-', 'bb', 'bd', 'cfoo', '--'])
159-
if ser_or_ind == 'series':
159+
if series_or_index == 'series':
160160
exp = Series(exp)
161161
with tm.assert_produces_warning(expected_warning=FutureWarning):
162162
# FutureWarning to switch to alignment by default
@@ -187,19 +187,19 @@ def test_str_cat(self, ser_or_ind):
187187
with tm.assert_raises_regex(ValueError, rgx):
188188
s.str.cat(list(z))
189189

190-
@pytest.mark.parametrize('ser_or_ind', ['series', 'index'])
191-
def test_str_cat_raises_intuitive_error(self, ser_or_ind):
190+
@pytest.mark.parametrize('series_or_index', ['series', 'index'])
191+
def test_str_cat_raises_intuitive_error(self, series_or_index):
192192
# https://github.com/pandas-dev/pandas/issues/11334
193193
s = Index(['a', 'b', 'c', 'd'])
194-
if ser_or_ind == 'series':
194+
if series_or_index == 'series':
195195
s = Series(s)
196196
message = "Did you mean to supply a `sep` keyword?"
197197
with tm.assert_raises_regex(ValueError, message):
198198
s.str.cat('|')
199199
with tm.assert_raises_regex(ValueError, message):
200200
s.str.cat(' ')
201201

202-
@pytest.mark.parametrize('ser_or_ind, dtype_caller, dtype_target', [
202+
@pytest.mark.parametrize('series_or_index, dtype_caller, dtype_target', [
203203
('series', 'object', 'object'),
204204
('series', 'object', 'category'),
205205
('series', 'category', 'object'),
@@ -209,14 +209,15 @@ def test_str_cat_raises_intuitive_error(self, ser_or_ind):
209209
('index', 'category', 'object'),
210210
('index', 'category', 'category')
211211
])
212-
def test_str_cat_categorical(self, ser_or_ind, dtype_caller, dtype_target):
212+
def test_str_cat_categorical(self, series_or_index,
213+
dtype_caller, dtype_target):
213214
s = Index(['a', 'a', 'b', 'a'], dtype=dtype_caller)
214-
if ser_or_ind == 'series':
215+
if series_or_index == 'series':
215216
s = Series(s)
216217
t = Index(['b', 'a', 'b', 'c'], dtype=dtype_target)
217218

218219
exp = Index(['ab', 'aa', 'bb', 'ac'], dtype=dtype_caller)
219-
if ser_or_ind == 'series':
220+
if series_or_index == 'series':
220221
exp = Series(exp)
221222
# Series/Index with Index
222223
with tm.assert_produces_warning(expected_warning=FutureWarning):
@@ -228,16 +229,16 @@ def test_str_cat_categorical(self, ser_or_ind, dtype_caller, dtype_target):
228229
# FutureWarning to switch to alignment by default
229230
assert_series_or_index_equal(s.str.cat(Series(t)), exp)
230231

231-
@pytest.mark.parametrize('ser_or_ind', ['series', 'index'])
232-
def test_str_cat_mixed_inputs(self, ser_or_ind):
232+
@pytest.mark.parametrize('series_or_index', ['series', 'index'])
233+
def test_str_cat_mixed_inputs(self, series_or_index):
233234
s = Index(['a', 'b', 'c', 'd'])
234-
if ser_or_ind == 'series':
235+
if series_or_index == 'series':
235236
s = Series(s)
236237
t = Series(['A', 'B', 'C', 'D'])
237238
d = concat([t, Series(s)], axis=1)
238239

239240
exp = Index(['aAa', 'bBb', 'cCc', 'dDd'])
240-
if ser_or_ind == 'series':
241+
if series_or_index == 'series':
241242
exp = Series(exp)
242243
# Series/Index with DataFrame
243244
with tm.assert_produces_warning(expected_warning=FutureWarning):
@@ -288,18 +289,18 @@ def test_str_cat_mixed_inputs(self, ser_or_ind):
288289
with tm.assert_raises_regex(ValueError, rgx):
289290
s.str.cat([z, list(s)])
290291

291-
@pytest.mark.parametrize('ser_or_ind, join', [
292+
@pytest.mark.parametrize('series_or_index, join', [
292293
('series', 'left'), ('series', 'outer'),
293294
('series', 'inner'), ('series', 'right'),
294295
('index', 'left'), ('index', 'outer'),
295296
('index', 'inner'), ('index', 'right')
296297
])
297-
def test_str_cat_align_indexed(self, ser_or_ind, join):
298+
def test_str_cat_align_indexed(self, series_or_index, join):
298299
# https://github.com/pandas-dev/pandas/issues/18657
299300
s = Series(['a', 'b', 'c', 'd'], index=['a', 'b', 'c', 'd'])
300301
t = Series(['D', 'A', 'E', 'B'], index=['d', 'a', 'e', 'b'])
301302
sa, ta = s.align(t, join=join)
302-
if ser_or_ind == 'index':
303+
if series_or_index == 'index':
303304
s = Index(s)
304305
sa = Index(sa)
305306

0 commit comments

Comments
 (0)