Skip to content

Commit 8eb2425

Browse files
authored
BUG: dtype lost in DataFrame.append (#43392)
1 parent e64784f commit 8eb2425

File tree

3 files changed

+30
-28
lines changed

3 files changed

+30
-28
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ Reshaping
406406
- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`)
407407
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
408408
- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
409+
- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
409410
-
410411

411412
Sparse

pandas/core/frame.py

+23-18
Original file line numberDiff line numberDiff line change
@@ -8966,6 +8966,7 @@ def append(
89668966
3 3
89678967
4 4
89688968
"""
8969+
combined_columns = None
89698970
if isinstance(other, (Series, dict)):
89708971
if isinstance(other, dict):
89718972
if not ignore_index:
@@ -8980,36 +8981,40 @@ def append(
89808981
index = Index([other.name], name=self.index.name)
89818982
idx_diff = other.index.difference(self.columns)
89828983
combined_columns = self.columns.append(idx_diff)
8983-
other = (
8984-
other.reindex(combined_columns, copy=False)
8985-
.to_frame()
8986-
.T.infer_objects()
8987-
.rename_axis(index.names, copy=False)
8988-
)
8989-
if not self.columns.equals(combined_columns):
8990-
self = self.reindex(columns=combined_columns)
8984+
row_df = other.to_frame().T
8985+
# infer_objects is needed for
8986+
# test_append_empty_frame_to_series_with_dateutil_tz
8987+
other = row_df.infer_objects().rename_axis(index.names, copy=False)
89918988
elif isinstance(other, list):
89928989
if not other:
89938990
pass
89948991
elif not isinstance(other[0], DataFrame):
89958992
other = DataFrame(other)
8996-
if (self.columns.get_indexer(other.columns) >= 0).all():
8997-
other = other.reindex(columns=self.columns)
89988993

89998994
from pandas.core.reshape.concat import concat
90008995

90018996
if isinstance(other, (list, tuple)):
90028997
to_concat = [self, *other]
90038998
else:
90048999
to_concat = [self, other]
9005-
return (
9006-
concat(
9007-
to_concat,
9008-
ignore_index=ignore_index,
9009-
verify_integrity=verify_integrity,
9010-
sort=sort,
9011-
)
9012-
).__finalize__(self, method="append")
9000+
9001+
result = concat(
9002+
to_concat,
9003+
ignore_index=ignore_index,
9004+
verify_integrity=verify_integrity,
9005+
sort=sort,
9006+
)
9007+
if (
9008+
combined_columns is not None
9009+
and not sort
9010+
and not combined_columns.equals(result.columns)
9011+
):
9012+
# TODO: reindexing here is a kludge bc union_indexes does not
9013+
# pass sort to index.union, xref #43375
9014+
# combined_columns.equals check is necessary for preserving dtype
9015+
# in test_crosstab_normalize
9016+
result = result.reindex(combined_columns, axis=1)
9017+
return result.__finalize__(self, method="append")
90139018

90149019
def join(
90159020
self,

pandas/tests/reshape/concat/test_append.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
import pandas.util._test_decorators as td
9-
108
import pandas as pd
119
from pandas import (
1210
DataFrame,
@@ -198,8 +196,12 @@ def test_append_same_columns_type(self, index):
198196
ser = Series([7, 8], index=ser_index, name=2)
199197
result = df.append(ser)
200198
expected = DataFrame(
201-
[[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
199+
[[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
202200
)
201+
# integer dtype is preserved for columns present in ser.index
202+
assert expected.dtypes.iloc[0].kind == "i"
203+
assert expected.dtypes.iloc[1].kind == "i"
204+
203205
tm.assert_frame_equal(result, expected)
204206

205207
# ser wider than df
@@ -301,14 +303,10 @@ def test_append_missing_column_proper_upcast(self, sort):
301303
assert appended["A"].dtype == "f8"
302304
assert appended["B"].dtype == "O"
303305

304-
# TODO(ArrayManager) DataFrame.append reindexes a Series itself (giving
305-
# float dtype) -> delay reindexing until concat_array_managers which properly
306-
# takes care of all-null dtype inference
307-
@td.skip_array_manager_not_yet_implemented
308306
def test_append_empty_frame_to_series_with_dateutil_tz(self):
309307
# GH 23682
310308
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
311-
ser = Series({"date": date, "a": 1.0, "b": 2.0})
309+
ser = Series({"a": 1.0, "b": 2.0, "date": date})
312310
df = DataFrame(columns=["c", "d"])
313311
result_a = df.append(ser, ignore_index=True)
314312
expected = DataFrame(
@@ -327,8 +325,6 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
327325
result_b = result_a.append(ser, ignore_index=True)
328326
tm.assert_frame_equal(result_b, expected)
329327

330-
# column order is different
331-
expected = expected[["c", "d", "date", "a", "b"]]
332328
result = df.append([ser, ser], ignore_index=True)
333329
tm.assert_frame_equal(result, expected)
334330

0 commit comments

Comments
 (0)