Skip to content

Commit 4a2d55b

Browse files
reidy-pjreback
authored andcommitted
BUG: Convert data elements when dtype=str in Series constructor with … (#18795)
1 parent 6d2fb3e commit 4a2d55b

File tree

4 files changed

+44
-0
lines changed

4 files changed

+44
-0
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ Conversion
281281
- Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
282282
- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
283283
- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
284+
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
284285

285286

286287
Indexing

pandas/core/series.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3277,6 +3277,11 @@ def _try_cast(arr, take_fast_path):
32773277
# This is to prevent mixed-type Series getting all casted to
32783278
# NumPy string type, e.g. NaN --> '-1#IND'.
32793279
if issubclass(subarr.dtype.type, compat.string_types):
3280+
# GH 16605
3281+
# If not empty convert the data to dtype
3282+
if not isna(data).all():
3283+
data = np.array(data, dtype=dtype, copy=False)
3284+
32803285
subarr = np.array(data, dtype=object, copy=copy)
32813286

32823287
return subarr

pandas/tests/frame/test_dtypes.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,25 @@ def test_arg_for_errors_in_astype(self):
676676

677677
df.astype(np.int8, errors='ignore')
678678

679+
@pytest.mark.parametrize('input_vals', [
680+
([1, 2]),
681+
([1.0, 2.0, np.nan]),
682+
(['1', '2']),
683+
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
684+
(list(pd.date_range('1/1/2011', periods=2, freq='H',
685+
tz='US/Eastern'))),
686+
([pd.Interval(left=0, right=5)]),
687+
])
688+
def test_constructor_list_str(self, input_vals):
689+
# GH 16605
690+
# Ensure that data elements are converted to strings when
691+
# dtype is str, 'str', or 'U'
692+
693+
for dtype in ['str', str, 'U']:
694+
result = DataFrame({'A': input_vals}, dtype=dtype)
695+
expected = DataFrame({'A': input_vals}).astype({'A': dtype})
696+
assert_frame_equal(result, expected)
697+
679698

680699
class TestDataFrameDatetimeWithTZ(TestData):
681700

pandas/tests/series/test_constructors.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,25 @@ def test_constructor_list_like(self):
142142
result = Series(obj, index=[0, 1, 2])
143143
assert_series_equal(result, expected)
144144

145+
@pytest.mark.parametrize('input_vals', [
146+
([1, 2]),
147+
([1.0, 2.0, np.nan]),
148+
(['1', '2']),
149+
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
150+
(list(pd.date_range('1/1/2011', periods=2, freq='H',
151+
tz='US/Eastern'))),
152+
([pd.Interval(left=0, right=5)]),
153+
])
154+
def test_constructor_list_str(self, input_vals):
155+
# GH 16605
156+
# Ensure that data elements from a list are converted to strings
157+
# when dtype is str, 'str', or 'U'
158+
159+
for dtype in ['str', str, 'U']:
160+
result = Series(input_vals, dtype=dtype)
161+
expected = Series(input_vals).astype(dtype)
162+
assert_series_equal(result, expected)
163+
145164
def test_constructor_generator(self):
146165
gen = (i for i in range(10))
147166

0 commit comments

Comments
 (0)