Skip to content

Commit b3e8b93

Browse files
author
tp
committed
DataFrame.append preserves columns dtype if possible
1 parent 3885ced commit b3e8b93

File tree

3 files changed

+81
-8
lines changed

3 files changed

+81
-8
lines changed

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -6113,8 +6113,11 @@ def append(self, other, ignore_index=False, verify_integrity=False):
61136113
# index name will be reset
61146114
index = Index([other.name], name=self.index.name)
61156115

6116-
combined_columns = self.columns.tolist() + self.columns.union(
6117-
other.index).difference(self.columns).tolist()
6116+
idx_diff = other.index.difference(self.columns)
6117+
try:
6118+
combined_columns = self.columns.append(idx_diff)
6119+
except TypeError:
6120+
combined_columns = self.columns.astype(object).append(idx_diff)
61186121
other = other.reindex(combined_columns, copy=False)
61196122
other = DataFrame(other.values.reshape((1, len(other))),
61206123
index=index,

pandas/tests/reshape/test_concat.py

+70-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from warnings import catch_warnings
22

3+
import datetime as dt
34
import dateutil
45
import numpy as np
56
from numpy.random import randn
@@ -829,12 +830,76 @@ def test_append_preserve_index_name(self):
829830
result = df1.append(df2)
830831
assert result.index.name == 'A'
831832

833+
@pytest.mark.parametrize("df_columns", [
834+
pd.RangeIndex(3),
835+
pd.CategoricalIndex('A B C'.split()),
836+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
837+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
838+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
839+
dt.datetime(2013, 1, 3, 6, 10),
840+
dt.datetime(2013, 1, 3, 7, 12)]),
841+
pd.Index([1, 2, 3]),
842+
])
843+
def test_append_same_columns_type(self, df_columns):
844+
# GH18359
845+
846+
# df wider than ser
847+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
848+
ser_index = df_columns[:2]
849+
ser = pd.Series([7, 8], index=ser_index, name=2)
850+
result = df.append(ser)
851+
expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
852+
index=[0, 1, 2],
853+
columns=df_columns)
854+
assert_frame_equal(result, expected)
855+
856+
# ser wider than df
857+
ser_index = df_columns
858+
df_columns = df_columns[:2]
859+
df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns)
860+
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
861+
result = df.append(ser)
862+
expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
863+
index=[0, 1, 2],
864+
columns=ser_index)
865+
assert_frame_equal(result, expected)
866+
867+
@pytest.mark.parametrize("df_columns", [
868+
pd.RangeIndex(3),
869+
pd.CategoricalIndex('A B C'.split()),
870+
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
871+
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
872+
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
873+
dt.datetime(2013, 1, 3, 6, 10),
874+
dt.datetime(2013, 1, 3, 7, 12)]),
875+
pd.Index([1, 2, 3]),
876+
])
877+
def test_append_different_columns_types(self, df_columns):
878+
# GH18359
879+
880+
# ser.index is a normal pd.Index, so result from df.append(ser) should
881+
# be pd.Index (but this is not possible for IntervalIndex and
882+
# MultiIndex)
883+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
884+
ser = pd.Series([7], index=['a'], name=2)
885+
if isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)):
886+
with pytest.raises(TypeError):
887+
df.append(ser)
888+
else:
889+
result = df.append(ser)
890+
idx_diff = ser.index.difference(df_columns)
891+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
892+
expected = pd.DataFrame([[1., 2., 3., np.nan],
893+
[4, 5, 6, np.nan],
894+
[np.nan, np.nan, np.nan, 7]],
895+
index=[0, 1, 2],
896+
columns=combined_columns)
897+
assert_frame_equal(result, expected)
898+
832899
def test_append_dtype_coerce(self):
833900

834901
# GH 4993
835902
# appending with datetime will incorrectly convert datetime64
836-
import datetime as dt
837-
from pandas import NaT
838903

839904
df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
840905
dt.datetime(2013, 1, 2, 0, 0)],
@@ -845,7 +910,9 @@ def test_append_dtype_coerce(self):
845910
dt.datetime(2013, 1, 4, 7, 10)]],
846911
columns=['start_time', 'end_time'])
847912

848-
expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10),
913+
expected = concat([Series([pd.NaT,
914+
pd.NaT,
915+
dt.datetime(2013, 1, 3, 6, 10),
849916
dt.datetime(2013, 1, 4, 7, 10)],
850917
name='end_time'),
851918
Series([dt.datetime(2013, 1, 1, 0, 0),

pandas/tests/reshape/test_pivot.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1540,12 +1540,14 @@ def test_crosstab_normalize(self):
15401540
index=pd.Index([1, 2, 'All'],
15411541
name='a',
15421542
dtype='object'),
1543-
columns=pd.Index([3, 4], name='b'))
1543+
columns=pd.Index([3, 4], name='b',
1544+
dtype='object'))
15441545
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
15451546
index=pd.Index([1, 2], name='a',
15461547
dtype='object'),
15471548
columns=pd.Index([3, 4, 'All'],
1548-
name='b'))
1549+
name='b',
1550+
dtype='object'))
15491551

15501552
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
15511553
[0.2, 0.6, 0.8],
@@ -1554,7 +1556,8 @@ def test_crosstab_normalize(self):
15541556
name='a',
15551557
dtype='object'),
15561558
columns=pd.Index([3, 4, 'All'],
1557-
name='b'))
1559+
name='b',
1560+
dtype='object'))
15581561
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
15591562
margins=True), row_normal_margins)
15601563
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',

0 commit comments

Comments
 (0)