Skip to content

Commit ead911d

Browse files
author
tp
committed
DataFrame.append preserves columns dtype if possible
1 parent 821028f commit ead911d

File tree

4 files changed

+82
-8
lines changed

4 files changed

+82
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ Other Enhancements
171171
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
172172
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
173173
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
174+
- :meth:`DataFrame.append` now preserves the type of the calling dataframe's columns, when possible (:issue:`18359`)
174175

175176
.. _whatsnew_0230.api_breaking:
176177

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -5037,8 +5037,11 @@ def append(self, other, ignore_index=False, verify_integrity=False):
50375037
# index name will be reset
50385038
index = Index([other.name], name=self.index.name)
50395039

5040-
combined_columns = self.columns.tolist() + self.columns.union(
5041-
other.index).difference(self.columns).tolist()
5040+
idx_diff = other.index.difference(self.columns)
5041+
try:
5042+
combined_columns = self.columns.append(idx_diff)
5043+
except TypeError:
5044+
combined_columns = self.columns.astype(object).append(idx_diff)
50425045
other = other.reindex(combined_columns, copy=False)
50435046
other = DataFrame(other.values.reshape((1, len(other))),
50445047
index=index,

pandas/tests/reshape/test_concat.py

+70-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from warnings import catch_warnings
22

3+
import datetime as dt
34
import dateutil
45
import numpy as np
56
from numpy.random import randn
@@ -820,12 +821,76 @@ def test_append_preserve_index_name(self):
820821
result = df1.append(df2)
821822
assert result.index.name == 'A'
822823

824+
@pytest.mark.parametrize("df_columns", [
825+
pd.RangeIndex(3),
826+
pd.CategoricalIndex('A B C'.split()),
827+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
828+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
829+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
830+
dt.datetime(2013, 1, 3, 6, 10),
831+
dt.datetime(2013, 1, 3, 7, 12)]),
832+
pd.Index([1, 2, 3]),
833+
])
834+
def test_append_same_columns_type(self, df_columns):
835+
# GH18359
836+
837+
# df wider than ser
838+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
839+
ser_index = df_columns[:2]
840+
ser = pd.Series([7, 8], index=ser_index, name=2)
841+
result = df.append(ser)
842+
expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
843+
index=[0, 1, 2],
844+
columns=df_columns)
845+
assert_frame_equal(result, expected)
846+
847+
# ser wider than df
848+
ser_index = df_columns
849+
df_columns = df_columns[:2]
850+
df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns)
851+
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
852+
result = df.append(ser)
853+
expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
854+
index=[0, 1, 2],
855+
columns=ser_index)
856+
assert_frame_equal(result, expected)
857+
858+
@pytest.mark.parametrize("df_columns", [
859+
pd.RangeIndex(3),
860+
pd.CategoricalIndex('A B C'.split()),
861+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
862+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
863+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
864+
dt.datetime(2013, 1, 3, 6, 10),
865+
dt.datetime(2013, 1, 3, 7, 12)]),
866+
pd.Index([1, 2, 3]),
867+
])
868+
def test_append_different_columns_types(self, df_columns):
869+
# GH18359
870+
871+
# ser.index is a normal pd.Index, so result from df.append(ser) should
872+
# be pd.Index (but this is not possible for IntervalIndex and
873+
# MultiIndex)
874+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
875+
ser = pd.Series([7], index=['a'], name=2)
876+
if isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)):
877+
with pytest.raises(TypeError):
878+
df.append(ser)
879+
else:
880+
result = df.append(ser)
881+
idx_diff = ser.index.difference(df_columns)
882+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
883+
expected = pd.DataFrame([[1., 2., 3., np.nan],
884+
[4, 5, 6, np.nan],
885+
[np.nan, np.nan, np.nan, 7]],
886+
index=[0, 1, 2],
887+
columns=combined_columns)
888+
assert_frame_equal(result, expected)
889+
823890
def test_append_dtype_coerce(self):
824891

825892
# GH 4993
826893
# appending with datetime will incorrectly convert datetime64
827-
import datetime as dt
828-
from pandas import NaT
829894

830895
df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
831896
dt.datetime(2013, 1, 2, 0, 0)],
@@ -836,7 +901,9 @@ def test_append_dtype_coerce(self):
836901
dt.datetime(2013, 1, 4, 7, 10)]],
837902
columns=['start_time', 'end_time'])
838903

839-
expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10),
904+
expected = concat([Series([pd.NaT,
905+
pd.NaT,
906+
dt.datetime(2013, 1, 3, 6, 10),
840907
dt.datetime(2013, 1, 4, 7, 10)],
841908
name='end_time'),
842909
Series([dt.datetime(2013, 1, 1, 0, 0),

pandas/tests/reshape/test_pivot.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1457,12 +1457,14 @@ def test_crosstab_normalize(self):
14571457
index=pd.Index([1, 2, 'All'],
14581458
name='a',
14591459
dtype='object'),
1460-
columns=pd.Index([3, 4], name='b'))
1460+
columns=pd.Index([3, 4], name='b',
1461+
dtype='object'))
14611462
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
14621463
index=pd.Index([1, 2], name='a',
14631464
dtype='object'),
14641465
columns=pd.Index([3, 4, 'All'],
1465-
name='b'))
1466+
name='b',
1467+
dtype='object'))
14661468

14671469
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
14681470
[0.2, 0.6, 0.8],
@@ -1471,7 +1473,8 @@ def test_crosstab_normalize(self):
14711473
name='a',
14721474
dtype='object'),
14731475
columns=pd.Index([3, 4, 'All'],
1474-
name='b'))
1476+
name='b',
1477+
dtype='object'))
14751478
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
14761479
margins=True), row_normal_margins)
14771480
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',

0 commit comments

Comments
 (0)