From b3e8b93e6fbd52e3d876fac31c7faef582339313 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 31 Dec 2017 22:33:37 +0000 Subject: [PATCH 1/6] DataFrame.append preserves columns dtype if possible --- pandas/core/frame.py | 7 ++- pandas/tests/reshape/test_concat.py | 73 +++++++++++++++++++++++++++-- pandas/tests/reshape/test_pivot.py | 9 ++-- 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e57579ddfc05..ca20def643c2b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6113,8 +6113,11 @@ def append(self, other, ignore_index=False, verify_integrity=False): # index name will be reset index = Index([other.name], name=self.index.name) - combined_columns = self.columns.tolist() + self.columns.union( - other.index).difference(self.columns).tolist() + idx_diff = other.index.difference(self.columns) + try: + combined_columns = self.columns.append(idx_diff) + except TypeError: + combined_columns = self.columns.astype(object).append(idx_diff) other = other.reindex(combined_columns, copy=False) other = DataFrame(other.values.reshape((1, len(other))), index=index, diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ffd37dc4b2f59..29eeef9831a2c 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,6 @@ from warnings import catch_warnings +import datetime as dt import dateutil import numpy as np from numpy.random import randn @@ -829,12 +830,76 @@ def test_append_preserve_index_name(self): result = df1.append(df2) assert result.index.name == 'A' + @pytest.mark.parametrize("df_columns", [ + pd.RangeIndex(3), + pd.CategoricalIndex('A B C'.split()), + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + pd.IntervalIndex.from_breaks([0, 1, 2, 3]), + pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12)]), + pd.Index([1, 2, 3]), + ]) + def test_append_same_columns_type(self, df_columns): + # GH18359 + + # df wider than ser + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser_index = df_columns[:2] + ser = pd.Series([7, 8], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]], + index=[0, 1, 2], + columns=df_columns) + assert_frame_equal(result, expected) + + # ser wider than df + ser_index = df_columns + df_columns = df_columns[:2] + df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns) + ser = pd.Series([7, 8, 9], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], + index=[0, 1, 2], + columns=ser_index) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("df_columns", [ + pd.RangeIndex(3), + pd.CategoricalIndex('A B C'.split()), + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + pd.IntervalIndex.from_breaks([0, 1, 2, 3]), + pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12)]), + pd.Index([1, 2, 3]), + ]) + def test_append_different_columns_types(self, df_columns): + # GH18359 + + # ser.index is a normal pd.Index, so result from df.append(ser) should + # be pd.Index (but this is not possible for IntervalIndex and + # MultiIndex) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser = pd.Series([7], index=['a'], name=2) + if isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)): + with pytest.raises(TypeError): + df.append(ser) + else: + result = df.append(ser) + idx_diff = ser.index.difference(df_columns) + combined_columns = Index(df_columns.tolist()).append(idx_diff) + expected = pd.DataFrame([[1., 2., 3., np.nan], + [4, 5, 6, np.nan], + [np.nan, np.nan, np.nan, 7]], + index=[0, 1, 2], + columns=combined_columns) + assert_frame_equal(result, expected) + def test_append_dtype_coerce(self): # GH 4993 # appending with datetime will incorrectly convert datetime64 - import datetime as dt - from pandas import NaT df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)], @@ -845,7 +910,9 @@ def test_append_dtype_coerce(self): dt.datetime(2013, 1, 4, 7, 10)]], columns=['start_time', 'end_time']) - expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10), + expected = concat([Series([pd.NaT, + pd.NaT, + dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 4, 7, 10)], name='end_time'), Series([dt.datetime(2013, 1, 1, 0, 0), diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 92bedbabdf2f1..1004b40bfb4c1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1540,12 +1540,14 @@ def test_crosstab_normalize(self): index=pd.Index([1, 2, 'All'], name='a', dtype='object'), - columns=pd.Index([3, 4], name='b')) + columns=pd.Index([3, 4], name='b', + dtype='object')) col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]], index=pd.Index([1, 2], name='a', dtype='object'), columns=pd.Index([3, 4, 'All'], - name='b')) + name='b', + dtype='object')) all_normal_margins = pd.DataFrame([[0.2, 0, 0.2], [0.2, 0.6, 0.8], @@ -1554,7 +1556,8 @@ def test_crosstab_normalize(self): name='a', dtype='object'), columns=pd.Index([3, 4, 'All'], - name='b')) + name='b', + dtype='object')) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index', margins=True), row_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns', From 093cd2c5040de8d2176c927fd544496c820735dd Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 4 Jan 2018 00:29:33 +0000 Subject: [PATCH 2/6] DataFrame.append: add parametrized tests, different dtypes --- pandas/tests/reshape/test_concat.py | 92 ++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 21 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 29eeef9831a2c..b7474f577736f 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,4 +1,5 @@ from warnings import catch_warnings +from itertools import combinations import datetime as dt import dateutil @@ -864,37 +865,86 @@ def test_append_same_columns_type(self, df_columns): columns=ser_index) assert_frame_equal(result, expected) - @pytest.mark.parametrize("df_columns", [ + @pytest.mark.parametrize("df_columns, series_index", combinations([ + pd.RangeIndex(3), + pd.Index([4, 5, 6]), + pd.Index([7.5, 8.5, 9.5]), + pd.CategoricalIndex('A B C'.split()), + pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12)]), + ], r=2)) + def test_append_different_columns_types(self, df_columns, series_index): + # GH18359 + # see also tests 'test_append_multi_index_raises' and + # 'test_append_interval_index_raises' below + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser = pd.Series([7, 8, 9], index=series_index, name=2) + + result = df.append(ser) + idx_diff = ser.index.difference(df_columns) + combined_columns = Index(df_columns.tolist()).append(idx_diff) + expected = pd.DataFrame([[1., 2., 3., np.nan, np.nan, np.nan], + [4, 5, 6, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, 7, 8, 9]], + index=[0, 1, 2], + columns=combined_columns) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("other_type", [ pd.RangeIndex(3), pd.CategoricalIndex('A B C'.split()), - pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), pd.IntervalIndex.from_breaks([0, 1, 2, 3]), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - pd.Index([1, 2, 3]), + pd.Index([4, 5, 6]), ]) - def test_append_different_columns_types(self, df_columns): + def test_append_multi_index_raises(self, other_type): # GH18359 + # .append will raise if MultiIndex appends or is appended to a + # different index type - # ser.index is a normal pd.Index, so result from df.append(ser) should - # be pd.Index (but this is not possible for IntervalIndex and - # MultiIndex) - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) - ser = pd.Series([7], index=['a'], name=2) - if isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)): - with pytest.raises(TypeError): - df.append(ser) + mi = pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]) + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + ser = pd.Series([7, 8, 9], index=other_type, name=2) + if isinstance(other_type, pd.IntervalIndex): + pytest.raises(ValueError, df.append, ser) else: - result = df.append(ser) - idx_diff = ser.index.difference(df_columns) - combined_columns = Index(df_columns.tolist()).append(idx_diff) - expected = pd.DataFrame([[1., 2., 3., np.nan], - [4, 5, 6, np.nan], - [np.nan, np.nan, np.nan, 7]], - index=[0, 1, 2], - columns=combined_columns) - assert_frame_equal(result, expected) + pytest.raises(TypeError, df.append, ser) + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=other_type) + ser = pd.Series([7, 8, 9], index=mi, name=2) + with pytest.raises(TypeError): + df.append(ser) + + @pytest.mark.parametrize("other_type", [ + pd.RangeIndex(3), + pd.CategoricalIndex('A B C'.split()), + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12)]), + pd.Index([4, 5, 6]), + ]) + def test_append_interval_index_raises(self, other_type): + # GH18359 + # .append will raise if IntervalIndex appends or is appended to a + # different index type + + ii = pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=ii) + ser = pd.Series([7, 8, 9], index=other_type, name=2) + with pytest.raises(TypeError): + df.append(ser) + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=other_type) + ser = pd.Series([7, 8, 9], index=ii, name=2) + with pytest.raises(ValueError): + df.append(ser) def test_append_dtype_coerce(self): From 4cb8a5bb4b11d3c034e3588786ec880571cc8ec7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 14 Jan 2018 16:54:15 -0500 Subject: [PATCH 3/6] add ids to parametrization --- pandas/tests/reshape/test_concat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b7474f577736f..285c5fee63fe4 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -840,7 +840,7 @@ def test_append_preserve_index_name(self): dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([1, 2, 3]), - ]) + ], ids=lambda x: x.__class__.__name__) def test_append_same_columns_type(self, df_columns): # GH18359 @@ -873,7 +873,7 @@ def test_append_same_columns_type(self, df_columns): pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - ], r=2)) + ], r=2), ids=lambda x: x.__class__.__name__) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 # see also tests 'test_append_multi_index_raises' and @@ -900,7 +900,7 @@ def test_append_different_columns_types(self, df_columns, series_index): dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([4, 5, 6]), - ]) + ], ids=lambda x: x.__class__.__name__) def test_append_multi_index_raises(self, other_type): # GH18359 # .append will raise if MultiIndex appends or is appended to a @@ -928,7 +928,7 @@ def test_append_multi_index_raises(self, other_type): dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([4, 5, 6]), - ]) + ], ids=lambda x: x.__class__.__name__) def test_append_interval_index_raises(self, other_type): # GH18359 # .append will raise if IntervalIndex appends or is appended to a From ed895a473fed7b78deb27faa4110fb463d940d2a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 14 Jan 2018 17:03:00 -0500 Subject: [PATCH 4/6] add ordered categories --- pandas/tests/reshape/test_concat.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 285c5fee63fe4..453440ed5ec64 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -834,13 +834,14 @@ def test_append_preserve_index_name(self): @pytest.mark.parametrize("df_columns", [ pd.RangeIndex(3), pd.CategoricalIndex('A B C'.split()), + pd.CategoricalIndex('A B C'.split(), ordered=True), pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), pd.IntervalIndex.from_breaks([0, 1, 2, 3]), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([1, 2, 3]), - ], ids=lambda x: x.__class__.__name__) + ], ids=lambda x: str(x.dtype)) def test_append_same_columns_type(self, df_columns): # GH18359 @@ -870,10 +871,11 @@ def test_append_same_columns_type(self, df_columns): pd.Index([4, 5, 6]), pd.Index([7.5, 8.5, 9.5]), pd.CategoricalIndex('A B C'.split()), + # pd.CategoricalIndex('A B C'.split(), ordered=True), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - ], r=2), ids=lambda x: x.__class__.__name__) + ], r=2), ids=lambda x: str(x.dtype)) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 # see also tests 'test_append_multi_index_raises' and @@ -895,12 +897,13 @@ def test_append_different_columns_types(self, df_columns, series_index): @pytest.mark.parametrize("other_type", [ pd.RangeIndex(3), pd.CategoricalIndex('A B C'.split()), + pd.CategoricalIndex('A B C'.split(), ordered=True), pd.IntervalIndex.from_breaks([0, 1, 2, 3]), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([4, 5, 6]), - ], ids=lambda x: x.__class__.__name__) + ], ids=lambda x: str(x.dtype)) def test_append_multi_index_raises(self, other_type): # GH18359 # .append will raise if MultiIndex appends or is appended to a @@ -923,12 +926,13 @@ def test_append_multi_index_raises(self, other_type): @pytest.mark.parametrize("other_type", [ pd.RangeIndex(3), pd.CategoricalIndex('A B C'.split()), + pd.CategoricalIndex('A B C'.split(), ordered=True), pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), pd.Index([4, 5, 6]), - ], ids=lambda x: x.__class__.__name__) + ], ids=lambda x: str(x.dtype)) def test_append_interval_index_raises(self, other_type): # GH18359 # .append will raise if IntervalIndex appends or is appended to a From fd1e7a50a41893035bcc705bb3ae5a1c9f5c158c Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 31 Dec 2017 22:33:37 +0000 Subject: [PATCH 5/6] DataFrame.append preserves columns dtype if possible --- pandas/tests/reshape/test_concat.py | 66 +++++++++++------------------ 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 453440ed5ec64..01e93095f7c43 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -833,6 +833,8 @@ def test_append_preserve_index_name(self): @pytest.mark.parametrize("df_columns", [ pd.RangeIndex(3), + pd.Index([1, 2, 3]), + pd.Index(list('abc')), pd.CategoricalIndex('A B C'.split()), pd.CategoricalIndex('A B C'.split(), ordered=True), pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), @@ -840,7 +842,6 @@ def test_append_preserve_index_name(self): pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - pd.Index([1, 2, 3]), ], ids=lambda x: str(x.dtype)) def test_append_same_columns_type(self, df_columns): # GH18359 @@ -870,16 +871,16 @@ def test_append_same_columns_type(self, df_columns): pd.RangeIndex(3), pd.Index([4, 5, 6]), pd.Index([7.5, 8.5, 9.5]), - pd.CategoricalIndex('A B C'.split()), - # pd.CategoricalIndex('A B C'.split(), ordered=True), + pd.Index(list('abc')), + pd.CategoricalIndex('A B C'.split(), ordered=True), + # pd.CategoricalIndex('A B C'.split()), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), ], r=2), ids=lambda x: str(x.dtype)) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 - # see also tests 'test_append_multi_index_raises' and - # 'test_append_interval_index_raises' below + # see also test 'test_append_different_columns_types_raises' below df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) ser = pd.Series([7, 8, 9], index=series_index, name=2) @@ -894,60 +895,43 @@ def test_append_different_columns_types(self, df_columns, series_index): columns=combined_columns) assert_frame_equal(result, expected) - @pytest.mark.parametrize("other_type", [ - pd.RangeIndex(3), - pd.CategoricalIndex('A B C'.split()), - pd.CategoricalIndex('A B C'.split(), ordered=True), + @pytest.mark.parametrize("this_type", [ pd.IntervalIndex.from_breaks([0, 1, 2, 3]), - pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), - dt.datetime(2013, 1, 3, 6, 10), - dt.datetime(2013, 1, 3, 7, 12)]), - pd.Index([4, 5, 6]), - ], ids=lambda x: str(x.dtype)) - def test_append_multi_index_raises(self, other_type): - # GH18359 - # .append will raise if MultiIndex appends or is appended to a - # different index type - - mi = pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]) - - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) - ser = pd.Series([7, 8, 9], index=other_type, name=2) - if isinstance(other_type, pd.IntervalIndex): - pytest.raises(ValueError, df.append, ser) - else: - pytest.raises(TypeError, df.append, ser) - - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=other_type) - ser = pd.Series([7, 8, 9], index=mi, name=2) - with pytest.raises(TypeError): - df.append(ser) - + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + ]) @pytest.mark.parametrize("other_type", [ pd.RangeIndex(3), + pd.Index([4, 5, 6]), + pd.Index(list("abc")), pd.CategoricalIndex('A B C'.split()), pd.CategoricalIndex('A B C'.split(), ordered=True), + pd.IntervalIndex.from_breaks([0, 1, 2, 3]), pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - pd.Index([4, 5, 6]), ], ids=lambda x: str(x.dtype)) - def test_append_interval_index_raises(self, other_type): + def test_append_different_columns_types_raises(self, + this_type, other_type): # GH18359 - # .append will raise if IntervalIndex appends or is appended to a - # different index type + # .append will raise if IntervalIndex/MultiIndex appends or is + # appended to a different index type + # + # see also test 'test_append_different_columns_types' above for + # appending without raising. - ii = pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + if type(this_type) is type(other_type): + # don't test same type + return - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=ii) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=this_type) ser = pd.Series([7, 8, 9], index=other_type, name=2) with pytest.raises(TypeError): df.append(ser) df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=other_type) - ser = pd.Series([7, 8, 9], index=ii, name=2) - with pytest.raises(ValueError): + ser = pd.Series([7, 8, 9], index=this_type, name=2) + with pytest.raises(TypeError): df.append(ser) def test_append_dtype_coerce(self): From 13537ae0513c0c96b25c11cb9aae2799db2c5d48 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 21 Feb 2018 19:29:42 +0000 Subject: [PATCH 6/6] DataFrame.append simplify tests --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/tests/reshape/test_concat.py | 99 +++++++++++++---------------- 2 files changed, 45 insertions(+), 55 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 408a52e0526ee..14146b9e455b4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -380,6 +380,7 @@ Other Enhancements - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) +- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) - ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 01e93095f7c43..640d09f3587fb 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,5 @@ from warnings import catch_warnings -from itertools import combinations +from itertools import combinations, product import datetime as dt import dateutil @@ -831,35 +831,45 @@ def test_append_preserve_index_name(self): result = df1.append(df2) assert result.index.name == 'A' - @pytest.mark.parametrize("df_columns", [ + indexes_can_append = [ pd.RangeIndex(3), - pd.Index([1, 2, 3]), + pd.Index([4, 5, 6]), + pd.Index([4.5, 5.5, 6.5]), pd.Index(list('abc')), pd.CategoricalIndex('A B C'.split()), - pd.CategoricalIndex('A B C'.split(), ordered=True), - pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), - pd.IntervalIndex.from_breaks([0, 1, 2, 3]), + pd.CategoricalIndex('D E F'.split(), ordered=True), pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10), dt.datetime(2013, 1, 3, 7, 12)]), - ], ids=lambda x: str(x.dtype)) - def test_append_same_columns_type(self, df_columns): + ] + + indexes_cannot_append_with_other = [ + pd.IntervalIndex.from_breaks([0, 1, 2, 3]), + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + ] + + all_indexes = indexes_can_append + indexes_cannot_append_with_other + + @pytest.mark.parametrize("index", + all_indexes, + ids=lambda x: x.__class__.__name__) + def test_append_same_columns_type(self, index): # GH18359 # df wider than ser - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) - ser_index = df_columns[:2] + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) + ser_index = index[:2] ser = pd.Series([7, 8], index=ser_index, name=2) result = df.append(ser) expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], - columns=df_columns) + columns=index) assert_frame_equal(result, expected) # ser wider than df - ser_index = df_columns - df_columns = df_columns[:2] - df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns) + ser_index = index + index = index[:2] + df = pd.DataFrame([[1, 2], [4, 5]], columns=index) ser = pd.Series([7, 8, 9], index=ser_index, name=2) result = df.append(ser) expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], @@ -867,20 +877,13 @@ def test_append_same_columns_type(self, df_columns): columns=ser_index) assert_frame_equal(result, expected) - @pytest.mark.parametrize("df_columns, series_index", combinations([ - pd.RangeIndex(3), - pd.Index([4, 5, 6]), - pd.Index([7.5, 8.5, 9.5]), - pd.Index(list('abc')), - pd.CategoricalIndex('A B C'.split(), ordered=True), - # pd.CategoricalIndex('A B C'.split()), - pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), - dt.datetime(2013, 1, 3, 6, 10), - dt.datetime(2013, 1, 3, 7, 12)]), - ], r=2), ids=lambda x: str(x.dtype)) + @pytest.mark.parametrize("df_columns, series_index", + combinations(indexes_can_append, r=2), + ids=lambda x: x.__class__.__name__) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 - # see also test 'test_append_different_columns_types_raises' below + # See also test 'test_append_different_columns_types_raises' below + # for errors raised when appending df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) ser = pd.Series([7, 8, 9], index=series_index, name=2) @@ -895,42 +898,28 @@ def test_append_different_columns_types(self, df_columns, series_index): columns=combined_columns) assert_frame_equal(result, expected) - @pytest.mark.parametrize("this_type", [ - pd.IntervalIndex.from_breaks([0, 1, 2, 3]), - pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), - ]) - @pytest.mark.parametrize("other_type", [ - pd.RangeIndex(3), - pd.Index([4, 5, 6]), - pd.Index(list("abc")), - pd.CategoricalIndex('A B C'.split()), - pd.CategoricalIndex('A B C'.split(), ordered=True), - pd.IntervalIndex.from_breaks([0, 1, 2, 3]), - pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), - pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), - dt.datetime(2013, 1, 3, 6, 10), - dt.datetime(2013, 1, 3, 7, 12)]), - ], ids=lambda x: str(x.dtype)) - def test_append_different_columns_types_raises(self, - this_type, other_type): + @pytest.mark.parametrize( + "index_can_append, index_cannot_append_with_other", + product(indexes_can_append, indexes_cannot_append_with_other), + ids=lambda x: x.__class__.__name__) + def test_append_different_columns_types_raises( + self, index_can_append, index_cannot_append_with_other): # GH18359 - # .append will raise if IntervalIndex/MultiIndex appends or is - # appended to a different index type + # Dataframe.append will raise if IntervalIndex/MultiIndex appends + # or is appended to a different index type # - # see also test 'test_append_different_columns_types' above for + # See also test 'test_append_different_columns_types' above for # appending without raising. - if type(this_type) is type(other_type): - # don't test same type - return - - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=this_type) - ser = pd.Series([7, 8, 9], index=other_type, name=2) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) + ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, + name=2) with pytest.raises(TypeError): df.append(ser) - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=other_type) - ser = pd.Series([7, 8, 9], index=this_type, name=2) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], + columns=index_cannot_append_with_other) + ser = pd.Series([7, 8, 9], index=index_can_append, name=2) with pytest.raises(TypeError): df.append(ser)