diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4654ceee9896b..d171b1a486162 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3656,57 +3656,6 @@ def infer(x): #---------------------------------------------------------------------- # Merging / joining methods - def append(self, other, ignore_index=False, verify_integrity=False): - """ - Append columns of other to end of this frame's columns and index, - returning a new object. Columns not in this frame are added as new - columns. - - Parameters - ---------- - other : DataFrame or list of Series/dict-like objects - ignore_index : boolean, default False - If True do not use the index labels. Useful for gluing together - record arrays - verify_integrity : boolean, default False - If True, raise ValueError on creating index with duplicates - - Notes - ----- - If a list of dict is passed and the keys are all contained in the - DataFrame's index, the order of the columns in the resulting DataFrame - will be unchanged - - Returns - ------- - appended : DataFrame - """ - if isinstance(other, (Series, dict)): - if isinstance(other, dict): - other = Series(other) - if other.name is None and not ignore_index: - raise TypeError('Can only append a Series if ' - 'ignore_index=True') - - index = None if other.name is None else [other.name] - combined_columns = self.columns.tolist() + (self.columns | other.index).difference(self.columns).tolist() - other = other.reindex(combined_columns, copy=False) - other = DataFrame(other.values.reshape((1, len(other))), - index=index, columns=combined_columns).convert_objects() - if not self.columns.equals(combined_columns): - self = self.reindex(columns=combined_columns) - elif isinstance(other, list) and not isinstance(other[0], DataFrame): - other = DataFrame(other) - if (self.columns.get_indexer(other.columns) >= 0).all(): - other = other.ix[:, self.columns] - - from pandas.tools.merge import concat - if isinstance(other, (list, tuple)): - to_concat = [self] + other - else: - to_concat = [self, other] - return concat(to_concat, ignore_index=ignore_index, - verify_integrity=verify_integrity) def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f4192e5761d7a..59a0fb282ef76 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3618,6 +3618,41 @@ def _tz_localize(ax, tz, ambiguous): result.set_axis(axis,ax) return result.__finalize__(self) + #---------------------------------------------------------------------- + # Merging / joining methods + + def append(self, other, ignore_index=False, verify_integrity=False, axis=0): + if self.ndim >= 3: + msg = "append is not implemented on on Panel or PanelND objects." + raise NotImplementedError(msg) + + if isinstance(other, (pd.Series, dict)): + if isinstance(other, dict): + other = pd.Series(other) + if other.name is None and not ignore_index: + raise TypeError('Can only append a Series if ' + 'ignore_index=True') + + index = None if other.name is None else [other.name] + combined_columns = self.columns.tolist() + ((self.columns | other.index) - self.columns).tolist() + other = other.reindex(combined_columns, copy=False) + other = pd.DataFrame(other.values.reshape((1, len(other))), + index=index, columns=combined_columns).convert_objects() + if not self.columns.equals(combined_columns): + self = self.reindex(columns=combined_columns) + elif isinstance(other, list) and not isinstance(other[0], pd.DataFrame): + other = pd.DataFrame(other) + if (self.columns.get_indexer(other.columns) >= 0).all(): + other = other.ix[:, self.columns] + + from pandas.tools.merge import concat + if isinstance(other, (list, tuple)): + to_concat = [self] + other + else: + to_concat = [self, other] + return concat(to_concat, ignore_index=ignore_index, axis=axis, + verify_integrity=verify_integrity) + #---------------------------------------------------------------------- # Numeric Methods def abs(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b4b8e4263ec78..0364844751671 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -6606,128 +6606,6 @@ def test_convert_objects_no_conversion(self): mixed2 = mixed1.convert_objects() assert_frame_equal(mixed1, mixed2) - def test_append_series_dict(self): - df = DataFrame(np.random.randn(5, 4), - columns=['foo', 'bar', 'baz', 'qux']) - - series = df.ix[4] - with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): - df.append(series, verify_integrity=True) - series.name = None - with assertRaisesRegexp(TypeError, 'Can only append a Series if ' - 'ignore_index=True'): - df.append(series, verify_integrity=True) - - result = df.append(series[::-1], ignore_index=True) - expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, - ignore_index=True) - assert_frame_equal(result, expected) - - # dict - result = df.append(series.to_dict(), ignore_index=True) - assert_frame_equal(result, expected) - - result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append(DataFrame({0: series[::-1][:3]}).T, - ignore_index=True) - assert_frame_equal(result, expected.ix[:, result.columns]) - - # can append when name set - row = df.ix[4] - row.name = 5 - result = df.append(row) - expected = df.append(df[-1:], ignore_index=True) - assert_frame_equal(result, expected) - - def test_append_list_of_series_dicts(self): - df = DataFrame(np.random.randn(5, 4), - columns=['foo', 'bar', 'baz', 'qux']) - - dicts = [x.to_dict() for idx, x in df.iterrows()] - - result = df.append(dicts, ignore_index=True) - expected = df.append(df, ignore_index=True) - assert_frame_equal(result, expected) - - # different columns - dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4}, - {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}] - result = df.append(dicts, ignore_index=True) - expected = df.append(DataFrame(dicts), ignore_index=True) - assert_frame_equal(result, expected) - - def test_append_empty_dataframe(self): - - # Empty df append empty df - df1 = DataFrame([]) - df2 = DataFrame([]) - result = df1.append(df2) - expected = df1.copy() - assert_frame_equal(result, expected) - - # Non-empty df append empty df - df1 = DataFrame(np.random.randn(5, 2)) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - assert_frame_equal(result, expected) - - # Empty df with columns append empty df - df1 = DataFrame(columns=['bar', 'foo']) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - assert_frame_equal(result, expected) - - # Non-Empty df with columns append empty df - df1 = DataFrame(np.random.randn(5, 2), columns=['bar', 'foo']) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - assert_frame_equal(result, expected) - - def test_append_dtypes(self): - - # GH 5754 - # row appends of different dtypes (so need to do by-item) - # can sometimes infer the correct type - - df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(5)) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - assert_frame_equal(result, expected) - - df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) - df2 = DataFrame({ 'bar' : 'foo' }, index=lrange(1,2)) - result = df1.append(df2) - expected = DataFrame({ 'bar' : [ Timestamp('20130101'), 'foo' ]}) - assert_frame_equal(result, expected) - - df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) - df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2)) - result = df1.append(df2) - expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') }) - assert_frame_equal(result, expected) - - df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) - df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2), dtype=object) - result = df1.append(df2) - expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') }) - assert_frame_equal(result, expected) - - df1 = DataFrame({ 'bar' : np.nan }, index=lrange(1)) - df2 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1,2)) - result = df1.append(df2) - expected = DataFrame({ 'bar' : Series([ np.nan, Timestamp('20130101')] ,dtype='M8[ns]') }) - assert_frame_equal(result, expected) - - df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) - df2 = DataFrame({ 'bar' : 1 }, index=lrange(1,2), dtype=object) - result = df1.append(df2) - expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), 1 ]) }) - assert_frame_equal(result, expected) - def test_asfreq(self): offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd) rule_monthly = self.tsframe.asfreq('BM') diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 0734da1ab09aa..e90f013e34a85 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -6,7 +6,7 @@ from numpy import nan import pandas as pd -from pandas import (Index, Series, DataFrame, Panel, +from pandas import (Index, Series, DataFrame, Panel, Timestamp, isnull, notnull, date_range, period_range) from pandas.core.index import Index, MultiIndex @@ -18,6 +18,7 @@ assert_frame_equal, assert_panel_equal, assert_almost_equal, + assertRaisesRegexp, ensure_clean) import pandas.util.testing as tm @@ -363,6 +364,9 @@ def setUp(self): self.series = tm.makeStringSeries() self.series.name = 'series' + self.objSeries = tm.makeObjectSeries() + self.objSeries.name = 'objects' + def test_rename_mi(self): s = Series([11,21,31], index=MultiIndex.from_tuples([("A",x) for x in ["a","B","c"]])) @@ -715,6 +719,32 @@ def test_describe_none(self): assert_series_equal(noneSeries.describe(), Series([0, 0], index=['count', 'unique'])) + # GH8295 + # ENH: allow axis argument to append / move append code to generic.py + # test cases for Panel and DataFrame are in TestNDFrame class. + def test_append_preserve_name(self): + result = self.ts[:5].append(self.ts[5:]) + self.assertEqual(result.name, self.ts.name) + + def test_append(self): + appendedSeries = self.series.append(self.objSeries) + for idx, value in compat.iteritems(appendedSeries): + if idx in self.series.index: + self.assertEqual(value, self.series[idx]) + elif idx in self.objSeries.index: + self.assertEqual(value, self.objSeries[idx]) + else: + self.fail("orphaned index!") + + self.assertRaises(ValueError, self.ts.append, self.ts, + verify_integrity=True) + + def test_append_many(self): + pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]] + + result = pieces[0].append(pieces[1:]) + assert_series_equal(result, self.ts) + class TestDataFrame(tm.TestCase, Generic): _typ = DataFrame @@ -1308,6 +1338,157 @@ def test_describe_raises(self): with tm.assertRaises(NotImplementedError): tm.makePanel().describe() + # GH8295 + # ENH: allow axis argument to append / move append code to generic.py + # test cases for Series are in TestSeries class. + + # test_append functions for Panel + def test_append_on_panel_raises(self): + with tm.assertRaises(NotImplementedError): + tm.makePanel().append(tm.makePanel()) + + # test_append functions for DataFrame + def test_append_series_dict(self): + df = DataFrame(np.random.randn(5, 4), + columns=['foo', 'bar', 'baz', 'qux']) + + series = df.ix[4] + with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): + df.append(series, verify_integrity=True) + series.name = None + with assertRaisesRegexp(TypeError, 'Can only append a Series if ' + 'ignore_index=True'): + df.append(series, verify_integrity=True) + + result = df.append(series[::-1], ignore_index=True) + expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, + ignore_index=True) + assert_frame_equal(result, expected) + + # dict + result = df.append(series.to_dict(), ignore_index=True) + assert_frame_equal(result, expected) + + result = df.append(series[::-1][:3], ignore_index=True) + expected = df.append(DataFrame({0: series[::-1][:3]}).T, + ignore_index=True) + assert_frame_equal(result, expected.ix[:, result.columns]) + + # can append when name set + row = df.ix[4] + row.name = 5 + result = df.append(row) + expected = df.append(df[-1:], ignore_index=True) + assert_frame_equal(result, expected) + + def test_append_list_of_series_dicts(self): + df = DataFrame(np.random.randn(5, 4), + columns=['foo', 'bar', 'baz', 'qux']) + + dicts = [x.to_dict() for idx, x in df.iterrows()] + + result = df.append(dicts, ignore_index=True) + expected = df.append(df, ignore_index=True) + assert_frame_equal(result, expected) + + # different columns + dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4}, + {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}] + result = df.append(dicts, ignore_index=True) + expected = df.append(DataFrame(dicts), ignore_index=True) + assert_frame_equal(result, expected) + + def test_append_empty_dataframe(self): + + # Empty df append empty df + df1 = DataFrame([]) + df2 = DataFrame([]) + result = df1.append(df2) + expected = df1.copy() + assert_frame_equal(result, expected) + + # Non-empty df append empty df + df1 = DataFrame(np.random.randn(5, 2)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + assert_frame_equal(result, expected) + + # Empty df with columns append empty df + df1 = DataFrame(columns=['bar', 'foo']) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + assert_frame_equal(result, expected) + + # Non-Empty df with columns append empty df + df1 = DataFrame(np.random.randn(5, 2), columns=['bar', 'foo']) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + assert_frame_equal(result, expected) + + def test_append_dtypes(self): + + # GH 5754 + # row appends of different dtypes (so need to do by-item) + # can sometimes infer the correct type + + df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(5)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + assert_frame_equal(result, expected) + + df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) + df2 = DataFrame({ 'bar' : 'foo' }, index=lrange(1,2)) + result = df1.append(df2) + expected = DataFrame({ 'bar' : [ Timestamp('20130101'), 'foo' ]}) + assert_frame_equal(result, expected) + + df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) + df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2)) + result = df1.append(df2) + expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') }) + assert_frame_equal(result, expected) + + df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) + df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2), dtype=object) + result = df1.append(df2) + expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') }) + assert_frame_equal(result, expected) + + df1 = DataFrame({ 'bar' : np.nan }, index=lrange(1)) + df2 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1,2)) + result = df1.append(df2) + expected = DataFrame({ 'bar' : Series([ np.nan, Timestamp('20130101')] ,dtype='M8[ns]') }) + assert_frame_equal(result, expected) + + df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1)) + df2 = DataFrame({ 'bar' : 1 }, index=lrange(1,2), dtype=object) + result = df1.append(df2) + expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), 1 ]) }) + assert_frame_equal(result, expected) + + def test_append_with_axis_argument_for_dataframe(self): + # GH8295: actual test for `axis` argument for `DataFrame` + df1 = DataFrame(np.random.randn(4, 2), columns=['FIRST', 'SECOND'], index=list('ABCD')) + df2 = DataFrame(np.random.randn(4, 2), columns=['THIRD', 'FORTH'], index=list('ABCD')) + + df1_df2_ax1 = df1.append(df2, axis=1) + self.assertEqual(df1_df2_ax1.shape, + (len(df1.index), len(df1.columns) + len(df2.columns))) + + with tm.assertRaises(ValueError): + df1.append(df1, axis=1, verify_integrity=True) + + df1_df1_ax0 = df1.append(df1, axis=0) + df1_df2_ax0 = df1.append(df2, axis=0) + self.assertEqual(df1_df2_ax0.shape, + (len(df1.index) + len(df2.index), len(df1.columns) + len(df2.columns))) + self.assertEqual(df1_df1_ax0.shape, + (len(df1.index) + len(df1.index), len(df1.columns))) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 0b863f9662e14..c63aa3bba5880 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -69,10 +69,6 @@ def test_copy_index_name_checking(self): com.pprint_thing(self.ts.index.name) self.assertIsNone(self.ts.index.name) - def test_append_preserve_name(self): - result = self.ts[:5].append(self.ts[5:]) - self.assertEqual(result.name, self.ts.name) - def test_dt_namespace_accessor(self): # GH 7207 @@ -2391,25 +2387,6 @@ def test_quantile_multi(self): Timestamp('2000-01-10 19:12:00')], index=[.2, .2])) - def test_append(self): - appendedSeries = self.series.append(self.objSeries) - for idx, value in compat.iteritems(appendedSeries): - if idx in self.series.index: - self.assertEqual(value, self.series[idx]) - elif idx in self.objSeries.index: - self.assertEqual(value, self.objSeries[idx]) - else: - self.fail("orphaned index!") - - self.assertRaises(ValueError, self.ts.append, self.ts, - verify_integrity=True) - - def test_append_many(self): - pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]] - - result = pieces[0].append(pieces[1:]) - assert_series_equal(result, self.ts) - def test_all_any(self): ts = tm.makeTimeSeries() bool_series = ts > 0