Skip to content

Commit 1081d90

Browse files
BranYangTomAugspurger
authored andcommitted
BUG: Fix Series doesn't work in pd.astype(). Now treat Series as dict. (pandas-dev#16725)
(cherry picked from commit 6ae92a8)
1 parent 8122288 commit 1081d90

File tree

4 files changed

+44
-17
lines changed

4 files changed

+44
-17
lines changed

doc/source/whatsnew/v0.20.3.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ Conversion
4545
^^^^^^^^^^
4646

4747
- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
48-
- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`).
48+
- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`).
49+
- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
4950

5051
Indexing
5152
^^^^^^^^

pandas/core/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3379,12 +3379,12 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
33793379
-------
33803380
casted : type of caller
33813381
"""
3382-
if isinstance(dtype, collections.Mapping):
3382+
if is_dict_like(dtype):
33833383
if self.ndim == 1: # i.e. Series
3384-
if len(dtype) > 1 or list(dtype.keys())[0] != self.name:
3384+
if len(dtype) > 1 or self.name not in dtype:
33853385
raise KeyError('Only the Series name can be used for '
33863386
'the key in Series dtype mappings.')
3387-
new_type = list(dtype.values())[0]
3387+
new_type = dtype[self.name]
33883388
return self.astype(new_type, copy, errors, **kwargs)
33893389
elif self.ndim > 2:
33903390
raise NotImplementedError(

pandas/tests/frame/test_dtypes.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,9 @@ def test_astype_str(self):
442442
expected = DataFrame(['1.12345678901'])
443443
assert_frame_equal(result, expected)
444444

445-
def test_astype_dict(self):
446-
# GH7271
445+
@pytest.mark.parametrize("dtype_class", [dict, Series])
446+
def test_astype_dict_like(self, dtype_class):
447+
# GH7271 & GH16717
447448
a = Series(date_range('2010-01-04', periods=5))
448449
b = Series(range(5))
449450
c = Series([0.0, 0.2, 0.4, 0.6, 0.8])
@@ -452,7 +453,8 @@ def test_astype_dict(self):
452453
original = df.copy(deep=True)
453454

454455
# change type of a subset of columns
455-
result = df.astype({'b': 'str', 'd': 'float32'})
456+
dt1 = dtype_class({'b': 'str', 'd': 'float32'})
457+
result = df.astype(dt1)
456458
expected = DataFrame({
457459
'a': a,
458460
'b': Series(['0', '1', '2', '3', '4']),
@@ -461,7 +463,8 @@ def test_astype_dict(self):
461463
assert_frame_equal(result, expected)
462464
assert_frame_equal(df, original)
463465

464-
result = df.astype({'b': np.float32, 'c': 'float32', 'd': np.float64})
466+
dt2 = dtype_class({'b': np.float32, 'c': 'float32', 'd': np.float64})
467+
result = df.astype(dt2)
465468
expected = DataFrame({
466469
'a': a,
467470
'b': Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype='float32'),
@@ -471,19 +474,31 @@ def test_astype_dict(self):
471474
assert_frame_equal(df, original)
472475

473476
# change all columns
474-
assert_frame_equal(df.astype({'a': str, 'b': str, 'c': str, 'd': str}),
477+
dt3 = dtype_class({'a': str, 'b': str, 'c': str, 'd': str})
478+
assert_frame_equal(df.astype(dt3),
475479
df.astype(str))
476480
assert_frame_equal(df, original)
477481

478482
# error should be raised when using something other than column labels
479483
# in the keys of the dtype dict
480-
pytest.raises(KeyError, df.astype, {'b': str, 2: str})
481-
pytest.raises(KeyError, df.astype, {'e': str})
484+
dt4 = dtype_class({'b': str, 2: str})
485+
dt5 = dtype_class({'e': str})
486+
pytest.raises(KeyError, df.astype, dt4)
487+
pytest.raises(KeyError, df.astype, dt5)
482488
assert_frame_equal(df, original)
483489

484490
# if the dtypes provided are the same as the original dtypes, the
485491
# resulting DataFrame should be the same as the original DataFrame
486-
equiv = df.astype({col: df[col].dtype for col in df.columns})
492+
dt6 = dtype_class({col: df[col].dtype for col in df.columns})
493+
equiv = df.astype(dt6)
494+
assert_frame_equal(df, equiv)
495+
assert_frame_equal(df, original)
496+
497+
# GH 16717
498+
# if dtypes provided is empty, the resulting DataFrame
499+
# should be the same as the original DataFrame
500+
dt7 = dtype_class({})
501+
result = df.astype(dt7)
487502
assert_frame_equal(df, equiv)
488503
assert_frame_equal(df, original)
489504

pandas/tests/series/test_dtypes.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -152,24 +152,35 @@ def test_astype_unicode(self):
152152
reload(sys) # noqa
153153
sys.setdefaultencoding(former_encoding)
154154

155-
def test_astype_dict(self):
155+
@pytest.mark.parametrize("dtype_class", [dict, Series])
156+
def test_astype_dict_like(self, dtype_class):
156157
# see gh-7271
157158
s = Series(range(0, 10, 2), name='abc')
158159

159-
result = s.astype({'abc': str})
160+
dt1 = dtype_class({'abc': str})
161+
result = s.astype(dt1)
160162
expected = Series(['0', '2', '4', '6', '8'], name='abc')
161163
tm.assert_series_equal(result, expected)
162164

163-
result = s.astype({'abc': 'float64'})
165+
dt2 = dtype_class({'abc': 'float64'})
166+
result = s.astype(dt2)
164167
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
165168
name='abc')
166169
tm.assert_series_equal(result, expected)
167170

171+
dt3 = dtype_class({'abc': str, 'def': str})
168172
with pytest.raises(KeyError):
169-
s.astype({'abc': str, 'def': str})
173+
s.astype(dt3)
170174

175+
dt4 = dtype_class({0: str})
171176
with pytest.raises(KeyError):
172-
s.astype({0: str})
177+
s.astype(dt4)
178+
179+
# GH16717
180+
# if dtypes provided is empty, it should error
181+
dt5 = dtype_class({})
182+
with pytest.raises(KeyError):
183+
s.astype(dt5)
173184

174185
def test_astype_generic_timestamp_deprecated(self):
175186
# see gh-15524

0 commit comments

Comments
 (0)