Skip to content

Commit 1e0d977

Browse files
author
tp
committed
DataFrame.append preserves columns dtype if possible
1 parent 8347ff8 commit 1e0d977

File tree

4 files changed

+82
-8
lines changed

4 files changed

+82
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ Other Enhancements
202202
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
203203
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
204204
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
205+
- :meth:`DataFrame.append` now preserves the type of the calling dataframe's columns, when possible (:issue:`18359`)
205206

206207
.. _whatsnew_0230.api_breaking:
207208

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -5036,8 +5036,11 @@ def append(self, other, ignore_index=False, verify_integrity=False):
50365036
# index name will be reset
50375037
index = Index([other.name], name=self.index.name)
50385038

5039-
combined_columns = self.columns.tolist() + self.columns.union(
5040-
other.index).difference(self.columns).tolist()
5039+
idx_diff = other.index.difference(self.columns)
5040+
try:
5041+
combined_columns = self.columns.append(idx_diff)
5042+
except TypeError:
5043+
combined_columns = self.columns.astype(object).append(idx_diff)
50415044
other = other.reindex(combined_columns, copy=False)
50425045
other = DataFrame(other.values.reshape((1, len(other))),
50435046
index=index,

pandas/tests/reshape/test_concat.py

+70-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from warnings import catch_warnings
22

3+
import datetime as dt
34
import dateutil
45
import numpy as np
56
from numpy.random import randn
@@ -829,12 +830,76 @@ def test_append_preserve_index_name(self):
829830
result = df1.append(df2)
830831
assert result.index.name == 'A'
831832

833+
@pytest.mark.parametrize("df_columns", [
834+
pd.RangeIndex(3),
835+
pd.CategoricalIndex('A B C'.split()),
836+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
837+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
838+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
839+
dt.datetime(2013, 1, 3, 6, 10),
840+
dt.datetime(2013, 1, 3, 7, 12)]),
841+
pd.Index([1, 2, 3]),
842+
])
843+
def test_append_same_columns_type(self, df_columns):
844+
# GH18359
845+
846+
# df wider than ser
847+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
848+
ser_index = df_columns[:2]
849+
ser = pd.Series([7, 8], index=ser_index, name=2)
850+
result = df.append(ser)
851+
expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
852+
index=[0, 1, 2],
853+
columns=df_columns)
854+
assert_frame_equal(result, expected)
855+
856+
# ser wider than df
857+
ser_index = df_columns
858+
df_columns = df_columns[:2]
859+
df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns)
860+
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
861+
result = df.append(ser)
862+
expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
863+
index=[0, 1, 2],
864+
columns=ser_index)
865+
assert_frame_equal(result, expected)
866+
867+
@pytest.mark.parametrize("df_columns", [
868+
pd.RangeIndex(3),
869+
pd.CategoricalIndex('A B C'.split()),
870+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
871+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
872+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
873+
dt.datetime(2013, 1, 3, 6, 10),
874+
dt.datetime(2013, 1, 3, 7, 12)]),
875+
pd.Index([1, 2, 3]),
876+
])
877+
def test_append_different_columns_types(self, df_columns):
878+
# GH18359
879+
880+
# ser.index is a normal pd.Index, so result from df.append(ser) should
881+
# be pd.Index (but this is not possible for IntervalIndex and
882+
# MultiIndex)
883+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
884+
ser = pd.Series([7], index=['a'], name=2)
885+
if isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)):
886+
with pytest.raises(TypeError):
887+
df.append(ser)
888+
else:
889+
result = df.append(ser)
890+
idx_diff = ser.index.difference(df_columns)
891+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
892+
expected = pd.DataFrame([[1., 2., 3., np.nan],
893+
[4, 5, 6, np.nan],
894+
[np.nan, np.nan, np.nan, 7]],
895+
index=[0, 1, 2],
896+
columns=combined_columns)
897+
assert_frame_equal(result, expected)
898+
832899
def test_append_dtype_coerce(self):
833900

834901
# GH 4993
835902
# appending with datetime will incorrectly convert datetime64
836-
import datetime as dt
837-
from pandas import NaT
838903

839904
df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
840905
dt.datetime(2013, 1, 2, 0, 0)],
@@ -845,7 +910,9 @@ def test_append_dtype_coerce(self):
845910
dt.datetime(2013, 1, 4, 7, 10)]],
846911
columns=['start_time', 'end_time'])
847912

848-
expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10),
913+
expected = concat([Series([pd.NaT,
914+
pd.NaT,
915+
dt.datetime(2013, 1, 3, 6, 10),
849916
dt.datetime(2013, 1, 4, 7, 10)],
850917
name='end_time'),
851918
Series([dt.datetime(2013, 1, 1, 0, 0),

pandas/tests/reshape/test_pivot.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1457,12 +1457,14 @@ def test_crosstab_normalize(self):
14571457
index=pd.Index([1, 2, 'All'],
14581458
name='a',
14591459
dtype='object'),
1460-
columns=pd.Index([3, 4], name='b'))
1460+
columns=pd.Index([3, 4], name='b',
1461+
dtype='object'))
14611462
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
14621463
index=pd.Index([1, 2], name='a',
14631464
dtype='object'),
14641465
columns=pd.Index([3, 4, 'All'],
1465-
name='b'))
1466+
name='b',
1467+
dtype='object'))
14661468

14671469
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
14681470
[0.2, 0.6, 0.8],
@@ -1471,7 +1473,8 @@ def test_crosstab_normalize(self):
14711473
name='a',
14721474
dtype='object'),
14731475
columns=pd.Index([3, 4, 'All'],
1474-
name='b'))
1476+
name='b',
1477+
dtype='object'))
14751478
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
14761479
margins=True), row_normal_margins)
14771480
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',

0 commit comments

Comments
 (0)