From d8d8b7ff49b434ee8c4fedb344407017077a6310 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 01:08:35 -0800 Subject: [PATCH 01/13] preserve subclass with _constructor* properties on stack/unstack --- pandas/core/reshape.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index faad6c500a21f..da216ce898598 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -410,13 +410,24 @@ def unstack(obj, level, fill_value=None): if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): - return _unstack_frame(obj, level, fill_value=fill_value) + unstacked = _unstack_frame(obj, level, fill_value=fill_value) else: - return obj.T.stack(dropna=False) + unstacked = obj.T.stack(dropna=False) + + if len(unstacked.shape) == 1: + return obj._constructor_sliced(unstacked) + else: + return obj._constructor(unstacked) + else: unstacker = _Unstacker(obj.values, obj.index, level=level, fill_value=fill_value) - return unstacker.get_result() + unstacked = unstacker.get_result() + + if len(unstacked.shape) == 1: + return obj._constructor(unstacked) + else: + return obj._constructor_expanddim(unstacked) def _unstack_frame(obj, level, fill_value=None): @@ -515,7 +526,7 @@ def factorize(index): mask = notnull(new_values) new_values = new_values[mask] new_index = new_index[mask] - return Series(new_values, index=new_index) + return frame._constructor_sliced(new_values, index=new_index) def stack_multiple(frame, level, dropna=True): From bab90914fb76a298e9163731b6cf00810a0906ce Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 17:08:29 -0800 Subject: [PATCH 02/13] bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex --- pandas/core/reshape.py | 8 +- pandas/tests/frame/test_subclass.py | 142 +++++++++++++++++++++++++++ pandas/tests/series/test_subclass.py | 11 +++ 3 files changed, 157 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index da216ce898598..9e670688a2b88 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -333,7 +333,7 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = Series(self[values].values, + indexed = self._constructor_sliced(self[values].values, index=MultiIndex.from_arrays([index, self[columns]])) return indexed.unstack(columns) @@ -665,7 +665,7 @@ def _convert_level_number(level_num, columns): new_index = MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) - result = DataFrame(new_data, index=new_index, columns=new_columns) + result = frame._constructor(new_data, index=new_index, columns=new_columns) # more efficient way to go about this? can do the whole masking biz but # will only save a small amount of time... @@ -825,7 +825,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[col] = np.asanyarray(frame.columns ._get_level_values(i)).repeat(N) - return DataFrame(mdata, columns=mcolumns) + return frame._constructor(mdata, columns=mcolumns) def lreshape(data, groups, dropna=True, label=None): @@ -894,7 +894,7 @@ def lreshape(data, groups, dropna=True, label=None): if not mask.all(): mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) - return DataFrame(mdata, columns=id_cols + pivot_cols) + return data._constructor(mdata, columns=id_cols + pivot_cols) def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9052a16bf973c..21649ce322ae6 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -125,6 +125,148 @@ def test_indexing_sliced(self): tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) + def test_subclass_stack(self): + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], + index=[list('aaabbbccc'), list('XYZXYZXYZ')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_stack_multi(self): + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43]], + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz')), + names=['aaa', 'ccc', 'yyy']), + columns=MultiIndex.from_tuples( + zip(list('WX')), names=['www'])) + + res = df.stack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43]], + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX')), + names=['aaa', 'ccc', 'www']), + columns=MultiIndex.from_tuples( + zip(list('yz')), names=['yyy'])) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_unstack(self): + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], + index=[list('XXXYYYZZZ'), list('abcabcabc')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_unstack_multi(self): + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12, 22, 13, 23], + [30, 40, 31, 41, 32, 42, 33, 43]], + index=MultiIndex.from_tuples( + zip(list('AB')), names=['aaa']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd')), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12, 32, 13, 33], + [20, 40, 21, 41, 22, 42, 23, 43]], + index=MultiIndex.from_tuples( + zip(list('cd')), names=['ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB')), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_pivot(self): + df = tm.SubclassedDataFrame({ + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.]}) + + pivoted = df.pivot( + index='index', columns='columns', values='values') + + expected = tm.SubclassedDataFrame({ + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.}}) + + expected.index.name, expected.columns.name = 'index', 'columns' + + tm.assert_frame_equal(pivoted, expected) + tm.assertIsInstance(pivoted, tm.SubclassedDataFrame) + def test_to_panel_expanddim(self): # GH 9762 diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 3b1b8aca426e1..1ee855545c6b7 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -32,6 +32,17 @@ def test_to_frame(self): tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) + def test_subclass_unstack(self): + s = tm.SubclassedSeries( + [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) + + res = s.unstack() + exp = tm.SubclassedDataFrame( + {'x': [1, 3], 'y': [2, 4]}, index=['a', 'b']) + + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + class TestSparseSeriesSubclassing(tm.TestCase): From 7c05bf124fca9f2076955023faf7c5142c379c81 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 17:16:38 -0800 Subject: [PATCH 03/13] flake8 --- pandas/core/reshape.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 9e670688a2b88..6e37d7b3556a7 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -333,8 +333,11 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = self._constructor_sliced(self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) + + indexed = self._constructor_sliced( + self[values].values, + index=MultiIndex.from_arrays([index, self[columns]])) + return indexed.unstack(columns) From e930200c7954644232b493760e3b89b12834f5b4 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 01:08:35 -0800 Subject: [PATCH 04/13] preserve subclass with _constructor* properties on stack/unstack --- pandas/core/reshape.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index faad6c500a21f..da216ce898598 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -410,13 +410,24 @@ def unstack(obj, level, fill_value=None): if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): - return _unstack_frame(obj, level, fill_value=fill_value) + unstacked = _unstack_frame(obj, level, fill_value=fill_value) else: - return obj.T.stack(dropna=False) + unstacked = obj.T.stack(dropna=False) + + if len(unstacked.shape) == 1: + return obj._constructor_sliced(unstacked) + else: + return obj._constructor(unstacked) + else: unstacker = _Unstacker(obj.values, obj.index, level=level, fill_value=fill_value) - return unstacker.get_result() + unstacked = unstacker.get_result() + + if len(unstacked.shape) == 1: + return obj._constructor(unstacked) + else: + return obj._constructor_expanddim(unstacked) def _unstack_frame(obj, level, fill_value=None): @@ -515,7 +526,7 @@ def factorize(index): mask = notnull(new_values) new_values = new_values[mask] new_index = new_index[mask] - return Series(new_values, index=new_index) + return frame._constructor_sliced(new_values, index=new_index) def stack_multiple(frame, level, dropna=True): From ff7ac1ab8060684e090d8776c8a82fc8a805f246 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 17:08:29 -0800 Subject: [PATCH 05/13] bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex --- pandas/core/reshape.py | 8 +- pandas/tests/frame/test_subclass.py | 142 +++++++++++++++++++++++++++ pandas/tests/series/test_subclass.py | 11 +++ 3 files changed, 157 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index da216ce898598..9e670688a2b88 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -333,7 +333,7 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = Series(self[values].values, + indexed = self._constructor_sliced(self[values].values, index=MultiIndex.from_arrays([index, self[columns]])) return indexed.unstack(columns) @@ -665,7 +665,7 @@ def _convert_level_number(level_num, columns): new_index = MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) - result = DataFrame(new_data, index=new_index, columns=new_columns) + result = frame._constructor(new_data, index=new_index, columns=new_columns) # more efficient way to go about this? can do the whole masking biz but # will only save a small amount of time... @@ -825,7 +825,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[col] = np.asanyarray(frame.columns ._get_level_values(i)).repeat(N) - return DataFrame(mdata, columns=mcolumns) + return frame._constructor(mdata, columns=mcolumns) def lreshape(data, groups, dropna=True, label=None): @@ -894,7 +894,7 @@ def lreshape(data, groups, dropna=True, label=None): if not mask.all(): mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) - return DataFrame(mdata, columns=id_cols + pivot_cols) + return data._constructor(mdata, columns=id_cols + pivot_cols) def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9052a16bf973c..21649ce322ae6 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -125,6 +125,148 @@ def test_indexing_sliced(self): tm.assert_series_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedSeries) + def test_subclass_stack(self): + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], + index=[list('aaabbbccc'), list('XYZXYZXYZ')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_stack_multi(self): + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43]], + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz')), + names=['aaa', 'ccc', 'yyy']), + columns=MultiIndex.from_tuples( + zip(list('WX')), names=['www'])) + + res = df.stack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43]], + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX')), + names=['aaa', 'ccc', 'www']), + columns=MultiIndex.from_tuples( + zip(list('yz')), names=['yyy'])) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_unstack(self): + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], + index=[list('XXXYYYZZZ'), list('abcabcabc')]) + + tm.assert_series_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedSeries) + + def test_subclass_unstack_multi(self): + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12, 22, 13, 23], + [30, 40, 31, 41, 32, 42, 33, 43]], + index=MultiIndex.from_tuples( + zip(list('AB')), names=['aaa']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd')), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12, 32, 13, 33], + [20, 40, 21, 41, 22, 42, 23, 43]], + index=MultiIndex.from_tuples( + zip(list('cd')), names=['ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB')), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_pivot(self): + df = tm.SubclassedDataFrame({ + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.]}) + + pivoted = df.pivot( + index='index', columns='columns', values='values') + + expected = tm.SubclassedDataFrame({ + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.}}) + + expected.index.name, expected.columns.name = 'index', 'columns' + + tm.assert_frame_equal(pivoted, expected) + tm.assertIsInstance(pivoted, tm.SubclassedDataFrame) + def test_to_panel_expanddim(self): # GH 9762 diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 3b1b8aca426e1..1ee855545c6b7 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -32,6 +32,17 @@ def test_to_frame(self): tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) + def test_subclass_unstack(self): + s = tm.SubclassedSeries( + [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) + + res = s.unstack() + exp = tm.SubclassedDataFrame( + {'x': [1, 3], 'y': [2, 4]}, index=['a', 'b']) + + tm.assert_frame_equal(res, exp) + tm.assertIsInstance(res, tm.SubclassedDataFrame) + class TestSparseSeriesSubclassing(tm.TestCase): From 3cba84adceb5d864f9474c9e1edd8893ebf5f67b Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 17:16:38 -0800 Subject: [PATCH 06/13] flake8 --- pandas/core/reshape.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 9e670688a2b88..6e37d7b3556a7 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -333,8 +333,11 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = self._constructor_sliced(self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) + + indexed = self._constructor_sliced( + self[values].values, + index=MultiIndex.from_arrays([index, self[columns]])) + return indexed.unstack(columns) From 68f6d159633f55aa26ad1145529be6a7bcf34c74 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 17:47:34 -0800 Subject: [PATCH 07/13] update whatsnew --- doc/source/whatsnew/v0.9.1.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index 9dd29a5fe7bf7..6d1848d708cdb 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -105,6 +105,8 @@ New features - DataFrame.drop now supports non-unique indexes (:issue:`2101`) - Panel.shift now supports negative periods (:issue:`2164`) - DataFrame now support unary ~ operator (:issue:`2110`) + - `stack`, `unstack`, and `pivot` operations now preserve subclass family + (:issue:`15563`) API changes ~~~~~~~~~~~ From 4c8b58d907f44bc16a45916b481377e8dbb95f49 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 18:03:32 -0800 Subject: [PATCH 08/13] fix python 3.3 compat issue (`list(zip)`) --- pandas/tests/frame/test_subclass.py | 36 +++++++++++++---------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 21649ce322ae6..88e7ded9c5d3a 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -4,7 +4,7 @@ import numpy as np -from pandas import DataFrame, Series, MultiIndex, Panel +from pandas import DataFrame, Series, MultiIndex, Panel, Index import pandas as pd import pandas.util.testing as tm @@ -145,10 +145,10 @@ def test_subclass_stack_multi(self): [30, 31, 32, 33], [40, 41, 42, 43]], index=MultiIndex.from_tuples( - zip(list('AABB'), list('cdcd')), + list(zip(list('AABB'), list('cdcd'))), names=['aaa', 'ccc']), columns=MultiIndex.from_tuples( - zip(list('WWXX'), list('yzyz')), + list(zip(list('WWXX'), list('yzyz'))), names=['www', 'yyy'])) exp = tm.SubclassedDataFrame([ @@ -161,10 +161,9 @@ def test_subclass_stack_multi(self): [40, 42], [41, 43]], index=MultiIndex.from_tuples( - zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz')), + list(zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), names=['aaa', 'ccc', 'yyy']), - columns=MultiIndex.from_tuples( - zip(list('WX')), names=['www'])) + columns=Index(['W', 'X'], name='www')) res = df.stack() tm.assert_frame_equal(res, exp) @@ -183,11 +182,10 @@ def test_subclass_stack_multi(self): [32, 33], [40, 41], [42, 43]], - index=MultiIndex.from_tuples( - zip(list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX')), + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), names=['aaa', 'ccc', 'www']), - columns=MultiIndex.from_tuples( - zip(list('yz')), names=['yyy'])) + columns=Index(['y', 'z'], name='yyy')) res = df.stack('www') tm.assert_frame_equal(res, exp) @@ -213,19 +211,18 @@ def test_subclass_unstack_multi(self): [30, 31, 32, 33], [40, 41, 42, 43]], index=MultiIndex.from_tuples( - zip(list('AABB'), list('cdcd')), + list(zip(list('AABB'), list('cdcd'))), names=['aaa', 'ccc']), columns=MultiIndex.from_tuples( - zip(list('WWXX'), list('yzyz')), + list(zip(list('WWXX'), list('yzyz'))), names=['www', 'yyy'])) exp = tm.SubclassedDataFrame([ [10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], - index=MultiIndex.from_tuples( - zip(list('AB')), names=['aaa']), - columns=MultiIndex.from_tuples( - zip(list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd')), + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), names=['www', 'yyy', 'ccc'])) res = df.unstack() @@ -239,10 +236,9 @@ def test_subclass_unstack_multi(self): exp = tm.SubclassedDataFrame([ [10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], - index=MultiIndex.from_tuples( - zip(list('cd')), names=['ccc']), - columns=MultiIndex.from_tuples( - zip(list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB')), + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), names=['www', 'yyy', 'aaa'])) res = df.unstack('aaa') From bfd146da6519154fd2ff9811ab65b47d333a59ee Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Fri, 3 Mar 2017 18:07:09 -0800 Subject: [PATCH 09/13] flake8 --- pandas/tests/frame/test_subclass.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 88e7ded9c5d3a..cb39383486880 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -160,8 +160,8 @@ def test_subclass_stack_multi(self): [31, 33], [40, 42], [41, 43]], - index=MultiIndex.from_tuples( - list(zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), names=['aaa', 'ccc', 'yyy']), columns=Index(['W', 'X'], name='www')) @@ -183,7 +183,7 @@ def test_subclass_stack_multi(self): [40, 41], [42, 43]], index=MultiIndex.from_tuples(list(zip( - list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), names=['aaa', 'ccc', 'www']), columns=Index(['y', 'z'], name='yyy')) @@ -222,8 +222,8 @@ def test_subclass_unstack_multi(self): [30, 40, 31, 41, 32, 42, 33, 43]], index=Index(['A', 'B'], name='aaa'), columns=MultiIndex.from_tuples(list(zip( - list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), - names=['www', 'yyy', 'ccc'])) + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) res = df.unstack() tm.assert_frame_equal(res, exp) @@ -238,7 +238,7 @@ def test_subclass_unstack_multi(self): [20, 40, 21, 41, 22, 42, 23, 43]], index=Index(['c', 'd'], name='ccc'), columns=MultiIndex.from_tuples(list(zip( - list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), names=['www', 'yyy', 'aaa'])) res = df.unstack('aaa') From f0be4c800538754b244f5dd3b3c6c664c20ddd3e Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Mon, 6 Mar 2017 23:40:02 -0800 Subject: [PATCH 10/13] update docs/source/internals.rst for subclassed reshape and minor bug fixes --- doc/source/internals.rst | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 3d96b93de4cc9..f114e8f7c5769 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -128,7 +128,7 @@ Property Attributes ``Series`` ``DataFrame`` ``Panel ``_constructor_expanddim`` ``DataFrame`` ``Panel`` ``NotImplementedError`` =========================== ======================= =================== ======================= -Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. +The below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties: .. code-block:: python @@ -152,6 +152,8 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame def _constructor_sliced(self): return SubclassedSeries +Overriding constructor properties allows subclass families to be preserved across slice and reshape operations: + .. code-block:: python >>> s = SubclassedSeries([1, 2, 3]) @@ -162,7 +164,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(to_framed) - >>> df = SubclassedDataFrame({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df = SubclassedDataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) >>> df A B C 0 1 4 7 @@ -190,6 +192,21 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(sliced2) + >>> stacked = df.stack() + >>> stacked + 0 A 1 + B 4 + C 7 + 1 A 2 + B 5 + C 8 + 2 A 3 + B 6 + C 9 + dtype: int64 + >>> type(stacked) + + Define Original Properties ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 1c2535dbf208af29c01df8a2bb4e7c11037baab3 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Mon, 6 Mar 2017 23:45:54 -0800 Subject: [PATCH 11/13] move whatsnew entry from 0.9.1 --> 0.20.0 --- doc/source/whatsnew/v0.20.0.txt | 2 +- doc/source/whatsnew/v0.9.1.txt | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 782ae6082c1cf..adf81bfdd6138 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -192,8 +192,8 @@ Other enhancements - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations +- `stack`, `unstack`, and `pivot` operations now preserve subclass family (:issue:`15563`) .. _whatsnew_0200.api_breaking: diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index 6d1848d708cdb..9dd29a5fe7bf7 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -105,8 +105,6 @@ New features - DataFrame.drop now supports non-unique indexes (:issue:`2101`) - Panel.shift now supports negative periods (:issue:`2164`) - DataFrame now support unary ~ operator (:issue:`2110`) - - `stack`, `unstack`, and `pivot` operations now preserve subclass family - (:issue:`15563`) API changes ~~~~~~~~~~~ From 5826158a0cb43db5af39802523e83424ce3bf57a Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Tue, 7 Mar 2017 00:01:16 -0800 Subject: [PATCH 12/13] add issue number comments to tests --- pandas/tests/frame/test_subclass.py | 5 +++++ pandas/tests/series/test_subclass.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index cb39383486880..72841cad83141 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -126,6 +126,7 @@ def test_indexing_sliced(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_stack(self): + # GH 15564 df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'b', 'c'], columns=['X', 'Y', 'Z']) @@ -139,6 +140,7 @@ def test_subclass_stack(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_stack_multi(self): + # GH 15564 df = tm.SubclassedDataFrame([ [10, 11, 12, 13], [20, 21, 22, 23], @@ -192,6 +194,7 @@ def test_subclass_stack_multi(self): tm.assertIsInstance(res, tm.SubclassedDataFrame) def test_subclass_unstack(self): + # GH 15564 df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'b', 'c'], columns=['X', 'Y', 'Z']) @@ -205,6 +208,7 @@ def test_subclass_unstack(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_unstack_multi(self): + # GH 15564 df = tm.SubclassedDataFrame([ [10, 11, 12, 13], [20, 21, 22, 23], @@ -246,6 +250,7 @@ def test_subclass_unstack_multi(self): tm.assertIsInstance(res, tm.SubclassedDataFrame) def test_subclass_pivot(self): + # GH 15564 df = tm.SubclassedDataFrame({ 'index': ['A', 'B', 'C', 'C', 'B', 'A'], 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 1ee855545c6b7..5ba38f45ba093 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -33,6 +33,7 @@ def test_to_frame(self): tm.assertIsInstance(res, tm.SubclassedDataFrame) def test_subclass_unstack(self): + # GH 15564 s = tm.SubclassedSeries( [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) @@ -87,6 +88,7 @@ def test_subclass_sparse_addition(self): tm.assert_sp_series_equal(s1 + s2, exp) def test_subclass_sparse_to_frame(self): + # GH 15564 s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx') res = s.to_frame() From 523d767bfea893757ddcbc7a8d4d7fd41a416f14 Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Tue, 7 Mar 2017 00:17:41 -0800 Subject: [PATCH 13/13] merge conflicts with upstream/master --- pandas/tests/frame/test_subclass.py | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 72841cad83141..d15c11a3b740f 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -126,7 +126,10 @@ def test_indexing_sliced(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_stack(self): +<<<<<<< HEAD # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'b', 'c'], columns=['X', 'Y', 'Z']) @@ -140,17 +143,27 @@ def test_subclass_stack(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_stack_multi(self): +<<<<<<< HEAD # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex df = tm.SubclassedDataFrame([ [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], index=MultiIndex.from_tuples( +<<<<<<< HEAD list(zip(list('AABB'), list('cdcd'))), names=['aaa', 'ccc']), columns=MultiIndex.from_tuples( list(zip(list('WWXX'), list('yzyz'))), +======= + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex names=['www', 'yyy'])) exp = tm.SubclassedDataFrame([ @@ -162,10 +175,18 @@ def test_subclass_stack_multi(self): [31, 33], [40, 42], [41, 43]], +<<<<<<< HEAD index=MultiIndex.from_tuples(list(zip( list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), names=['aaa', 'ccc', 'yyy']), columns=Index(['W', 'X'], name='www')) +======= + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz')), + names=['aaa', 'ccc', 'yyy']), + columns=MultiIndex.from_tuples( + zip(list('WX')), names=['www'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex res = df.stack() tm.assert_frame_equal(res, exp) @@ -184,17 +205,28 @@ def test_subclass_stack_multi(self): [32, 33], [40, 41], [42, 43]], +<<<<<<< HEAD index=MultiIndex.from_tuples(list(zip( list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), names=['aaa', 'ccc', 'www']), columns=Index(['y', 'z'], name='yyy')) +======= + index=MultiIndex.from_tuples( + zip(list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX')), + names=['aaa', 'ccc', 'www']), + columns=MultiIndex.from_tuples( + zip(list('yz')), names=['yyy'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex res = df.stack('www') tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) def test_subclass_unstack(self): +<<<<<<< HEAD # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'b', 'c'], columns=['X', 'Y', 'Z']) @@ -208,26 +240,44 @@ def test_subclass_unstack(self): tm.assertIsInstance(res, tm.SubclassedSeries) def test_subclass_unstack_multi(self): +<<<<<<< HEAD # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex df = tm.SubclassedDataFrame([ [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], index=MultiIndex.from_tuples( +<<<<<<< HEAD list(zip(list('AABB'), list('cdcd'))), names=['aaa', 'ccc']), columns=MultiIndex.from_tuples( list(zip(list('WWXX'), list('yzyz'))), +======= + zip(list('AABB'), list('cdcd')), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWXX'), list('yzyz')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex names=['www', 'yyy'])) exp = tm.SubclassedDataFrame([ [10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], +<<<<<<< HEAD index=Index(['A', 'B'], name='aaa'), columns=MultiIndex.from_tuples(list(zip( list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), names=['www', 'yyy', 'ccc'])) +======= + index=MultiIndex.from_tuples( + zip(list('AB')), names=['aaa']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd')), + names=['www', 'yyy', 'ccc'])) +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex res = df.unstack() tm.assert_frame_equal(res, exp) @@ -240,9 +290,16 @@ def test_subclass_unstack_multi(self): exp = tm.SubclassedDataFrame([ [10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], +<<<<<<< HEAD index=Index(['c', 'd'], name='ccc'), columns=MultiIndex.from_tuples(list(zip( list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), +======= + index=MultiIndex.from_tuples( + zip(list('cd')), names=['ccc']), + columns=MultiIndex.from_tuples( + zip(list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB')), +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex names=['www', 'yyy', 'aaa'])) res = df.unstack('aaa') @@ -250,7 +307,10 @@ def test_subclass_unstack_multi(self): tm.assertIsInstance(res, tm.SubclassedDataFrame) def test_subclass_pivot(self): +<<<<<<< HEAD # GH 15564 +======= +>>>>>>> bug fix; test stack, unstack, pivot for series and df with Index, MultiIndex df = tm.SubclassedDataFrame({ 'index': ['A', 'B', 'C', 'C', 'B', 'A'], 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],