Skip to content

Commit 691c067

Browse files
committed
BUG: Preserve column metadata with DataFrame.astype
1 parent 52559f5 commit 691c067

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,7 @@ Reshaping
954954
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
955955
- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ to datetimes (:issue:`19671`)
956956
- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`)
957+
- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
957958

958959
Other
959960
^^^^^

pandas/core/generic.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -4436,17 +4436,21 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
44364436
results.append(col.astype(dtype[col_name], copy=copy))
44374437
else:
44384438
results.append(results.append(col.copy() if copy else col))
4439-
return pd.concat(results, axis=1, copy=False)
44404439

44414440
elif is_categorical_dtype(dtype) and self.ndim > 1:
44424441
# GH 18099: columnwise conversion to categorical
44434442
results = (self[col].astype(dtype, copy=copy) for col in self)
4444-
return pd.concat(results, axis=1, copy=False)
44454443

4446-
# else, only a single dtype is given
4447-
new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
4448-
**kwargs)
4449-
return self._constructor(new_data).__finalize__(self)
4444+
else:
4445+
# else, only a single dtype is given
4446+
new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
4447+
**kwargs)
4448+
return self._constructor(new_data).__finalize__(self)
4449+
4450+
# GH 19920: retain column metadata after concat
4451+
result = pd.concat(results, axis=1, copy=False)
4452+
result.columns = self.columns
4453+
return result
44504454

44514455
def copy(self, deep=True):
44524456
"""

pandas/tests/frame/test_dtypes.py

+9
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,15 @@ def test_astype_categoricaldtype_class_raises(self, cls):
649649
with tm.assert_raises_regex(TypeError, xpr):
650650
df['A'].astype(cls)
651651

652+
@pytest.mark.parametrize('dtype', [
653+
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
654+
def test_astype_column_metadata(self, dtype):
655+
# GH 19920
656+
columns = pd.UInt64Index([100, 200, 300], name='foo')
657+
df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)
658+
df = df.astype(dtype)
659+
tm.assert_index_equal(df.columns, columns)
660+
652661
@pytest.mark.parametrize("dtype", ["M8", "m8"])
653662
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
654663
def test_astype_from_datetimelike_to_objectt(self, dtype, unit):

0 commit comments

Comments
 (0)