Skip to content

Commit 6bb6860

Browse files
author
tp
committed
DataFrame.append preserves columns dtype if possible
1 parent 0e3c797 commit 6bb6860

File tree

4 files changed

+63
-6
lines changed

4 files changed

+63
-6
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ Other Enhancements
145145
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
146146
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
147147
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
148+
- :meth:`DataFrame.append` now preserves the type of the calling dataframe's columns, when possible (:issue:`18359`)
148149

149150
.. _whatsnew_0230.api_breaking:
150151

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -5039,8 +5039,12 @@ def append(self, other, ignore_index=False, verify_integrity=False):
50395039
# index name will be reset
50405040
index = Index([other.name], name=self.index.name)
50415041

5042-
combined_columns = self.columns.tolist() + self.columns.union(
5043-
other.index).difference(self.columns).tolist()
5042+
idx_diff = other.index.difference(self.columns)
5043+
try:
5044+
combined_columns = self.columns.append(idx_diff)
5045+
except TypeError:
5046+
lst = self.columns.tolist()
5047+
combined_columns = Index(lst).append(idx_diff)
50445048
other = other.reindex(combined_columns, copy=False)
50455049
other = DataFrame(other.values.reshape((1, len(other))),
50465050
index=index,

pandas/tests/reshape/test_concat.py

+50-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from warnings import catch_warnings
22

3+
import datetime as dt
34
import dateutil
45
import numpy as np
56
from numpy.random import randn
@@ -820,11 +821,59 @@ def test_append_preserve_index_name(self):
820821
result = df1.append(df2)
821822
assert result.index.name == 'A'
822823

824+
@pytest.mark.parametrize("df_columns", [
825+
pd.RangeIndex(3),
826+
pd.CategoricalIndex('A B C'.split()),
827+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
828+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
829+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
830+
dt.datetime(2013, 1, 3, 6, 10),
831+
dt.datetime(2013, 1, 3, 7, 12)]),
832+
pd.Index([1, 2, 3]),
833+
])
834+
def test_append_same_columns_type(self, df_columns):
835+
# GH18359
836+
837+
# ser.index is a normal pd.Index, result from df.append(ser) should be
838+
# pd.Index (but this is not possible for IntervalIndex and MultiIndex)
839+
if not isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)):
840+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
841+
ser = pd.Series([7], index=['a'], name=2)
842+
result = df.append(ser)
843+
idx_diff = ser.index.difference(df_columns)
844+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
845+
expected = pd.DataFrame([[1., 2., 3., np.nan],
846+
[4, 5, 6, np.nan],
847+
[np.nan, np.nan, np.nan, 7]],
848+
index=[0, 1, 2],
849+
columns=combined_columns)
850+
assert_frame_equal(result, expected)
851+
852+
# df wider than ser
853+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
854+
ser_index = df_columns[:2]
855+
ser = pd.Series([7, 8], index=ser_index, name=2)
856+
result = df.append(ser)
857+
expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
858+
index=[0, 1, 2],
859+
columns=df_columns)
860+
assert_frame_equal(result, expected)
861+
862+
# ser wider than df
863+
ser_index = df_columns
864+
df_columns = df_columns[:2]
865+
df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns)
866+
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
867+
result = df.append(ser)
868+
expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
869+
index=[0, 1, 2],
870+
columns=ser_index)
871+
assert_frame_equal(result, expected)
872+
823873
def test_append_dtype_coerce(self):
824874

825875
# GH 4993
826876
# appending with datetime will incorrectly convert datetime64
827-
import datetime as dt
828877
from pandas import NaT
829878

830879
df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),

pandas/tests/reshape/test_pivot.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1457,12 +1457,14 @@ def test_crosstab_normalize(self):
14571457
index=pd.Index([1, 2, 'All'],
14581458
name='a',
14591459
dtype='object'),
1460-
columns=pd.Index([3, 4], name='b'))
1460+
columns=pd.Index([3, 4], name='b',
1461+
dtype='object'))
14611462
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
14621463
index=pd.Index([1, 2], name='a',
14631464
dtype='object'),
14641465
columns=pd.Index([3, 4, 'All'],
1465-
name='b'))
1466+
name='b',
1467+
dtype='object'))
14661468

14671469
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
14681470
[0.2, 0.6, 0.8],
@@ -1471,7 +1473,8 @@ def test_crosstab_normalize(self):
14711473
name='a',
14721474
dtype='object'),
14731475
columns=pd.Index([3, 4, 'All'],
1474-
name='b'))
1476+
name='b',
1477+
dtype='object'))
14751478
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
14761479
margins=True), row_normal_margins)
14771480
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',

0 commit comments

Comments
 (0)