Skip to content

Subclassed reshape #15564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.9.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ New features
- DataFrame.drop now supports non-unique indexes (:issue:`2101`)
- Panel.shift now supports negative periods (:issue:`2164`)
- DataFrame now support unary ~ operator (:issue:`2110`)
- `stack`, `unstack`, and `pivot` operations now preserve subclass family
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to 0.20.0

(:issue:`15563`)

API changes
~~~~~~~~~~~
Expand Down
32 changes: 23 additions & 9 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,11 @@ def pivot(self, index=None, columns=None, values=None):
index = self.index
else:
index = self[index]
indexed = Series(self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))

indexed = self._constructor_sliced(
self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))

return indexed.unstack(columns)


Expand Down Expand Up @@ -410,13 +413,24 @@ def unstack(obj, level, fill_value=None):

if isinstance(obj, DataFrame):
if isinstance(obj.index, MultiIndex):
return _unstack_frame(obj, level, fill_value=fill_value)
unstacked = _unstack_frame(obj, level, fill_value=fill_value)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm I think this should be done inside the .stack and _unstack_frame functions no?

unstacked = obj.T.stack(dropna=False)

if len(unstacked.shape) == 1:
return obj._constructor_sliced(unstacked)
else:
return obj.T.stack(dropna=False)
return obj._constructor(unstacked)

else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
fill_value=fill_value)
return unstacker.get_result()
unstacked = unstacker.get_result()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, this now has logic in 2 places, the creation itself of the unstacked result, then the return class. should be a part of get_result (it certainly could be another method that is called by get_result) though

if len(unstacked.shape) == 1:
return obj._constructor(unstacked)
else:
return obj._constructor_expanddim(unstacked)


def _unstack_frame(obj, level, fill_value=None):
Expand Down Expand Up @@ -515,7 +529,7 @@ def factorize(index):
mask = notnull(new_values)
new_values = new_values[mask]
new_index = new_index[mask]
return Series(new_values, index=new_index)
return frame._constructor_sliced(new_values, index=new_index)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is good



def stack_multiple(frame, level, dropna=True):
Expand Down Expand Up @@ -654,7 +668,7 @@ def _convert_level_number(level_num, columns):
new_index = MultiIndex(levels=new_levels, labels=new_labels,
names=new_names, verify_integrity=False)

result = DataFrame(new_data, index=new_index, columns=new_columns)
result = frame._constructor(new_data, index=new_index, columns=new_columns)

# more efficient way to go about this? can do the whole masking biz but
# will only save a small amount of time...
Expand Down Expand Up @@ -814,7 +828,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
mdata[col] = np.asanyarray(frame.columns
._get_level_values(i)).repeat(N)

return DataFrame(mdata, columns=mcolumns)
return frame._constructor(mdata, columns=mcolumns)


def lreshape(data, groups, dropna=True, label=None):
Expand Down Expand Up @@ -883,7 +897,7 @@ def lreshape(data, groups, dropna=True, label=None):
if not mask.all():
mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata))

return DataFrame(mdata, columns=id_cols + pivot_cols)
return data._constructor(mdata, columns=id_cols + pivot_cols)


def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
Expand Down
140 changes: 139 additions & 1 deletion pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from pandas import DataFrame, Series, MultiIndex, Panel
from pandas import DataFrame, Series, MultiIndex, Panel, Index
import pandas as pd
import pandas.util.testing as tm

Expand Down Expand Up @@ -125,6 +125,144 @@ def test_indexing_sliced(self):
tm.assert_series_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedSeries)

def test_subclass_stack(self):
df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the issue number as a comment

index=['a', 'b', 'c'],
columns=['X', 'Y', 'Z'])

res = df.stack()
exp = tm.SubclassedSeries(
[1, 2, 3, 4, 5, 6, 7, 8, 9],
index=[list('aaabbbccc'), list('XYZXYZXYZ')])

tm.assert_series_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedSeries)

def test_subclass_stack_multi(self):
df = tm.SubclassedDataFrame([
[10, 11, 12, 13],
[20, 21, 22, 23],
[30, 31, 32, 33],
[40, 41, 42, 43]],
index=MultiIndex.from_tuples(
list(zip(list('AABB'), list('cdcd'))),
names=['aaa', 'ccc']),
columns=MultiIndex.from_tuples(
list(zip(list('WWXX'), list('yzyz'))),
names=['www', 'yyy']))

exp = tm.SubclassedDataFrame([
[10, 12],
[11, 13],
[20, 22],
[21, 23],
[30, 32],
[31, 33],
[40, 42],
[41, 43]],
index=MultiIndex.from_tuples(list(zip(
list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))),
names=['aaa', 'ccc', 'yyy']),
columns=Index(['W', 'X'], name='www'))

res = df.stack()
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

res = df.stack('yyy')
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

exp = tm.SubclassedDataFrame([
[10, 11],
[12, 13],
[20, 21],
[22, 23],
[30, 31],
[32, 33],
[40, 41],
[42, 43]],
index=MultiIndex.from_tuples(list(zip(
list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))),
names=['aaa', 'ccc', 'www']),
columns=Index(['y', 'z'], name='yyy'))

res = df.stack('www')
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

def test_subclass_unstack(self):
df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=['a', 'b', 'c'],
columns=['X', 'Y', 'Z'])

res = df.unstack()
exp = tm.SubclassedSeries(
[1, 4, 7, 2, 5, 8, 3, 6, 9],
index=[list('XXXYYYZZZ'), list('abcabcabc')])

tm.assert_series_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedSeries)

def test_subclass_unstack_multi(self):
df = tm.SubclassedDataFrame([
[10, 11, 12, 13],
[20, 21, 22, 23],
[30, 31, 32, 33],
[40, 41, 42, 43]],
index=MultiIndex.from_tuples(
list(zip(list('AABB'), list('cdcd'))),
names=['aaa', 'ccc']),
columns=MultiIndex.from_tuples(
list(zip(list('WWXX'), list('yzyz'))),
names=['www', 'yyy']))

exp = tm.SubclassedDataFrame([
[10, 20, 11, 21, 12, 22, 13, 23],
[30, 40, 31, 41, 32, 42, 33, 43]],
index=Index(['A', 'B'], name='aaa'),
columns=MultiIndex.from_tuples(list(zip(
list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))),
names=['www', 'yyy', 'ccc']))

res = df.unstack()
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

res = df.unstack('ccc')
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

exp = tm.SubclassedDataFrame([
[10, 30, 11, 31, 12, 32, 13, 33],
[20, 40, 21, 41, 22, 42, 23, 43]],
index=Index(['c', 'd'], name='ccc'),
columns=MultiIndex.from_tuples(list(zip(
list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))),
names=['www', 'yyy', 'aaa']))

res = df.unstack('aaa')
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

def test_subclass_pivot(self):
df = tm.SubclassedDataFrame({
'index': ['A', 'B', 'C', 'C', 'B', 'A'],
'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
'values': [1., 2., 3., 3., 2., 1.]})

pivoted = df.pivot(
index='index', columns='columns', values='values')

expected = tm.SubclassedDataFrame({
'One': {'A': 1., 'B': 2., 'C': 3.},
'Two': {'A': 1., 'B': 2., 'C': 3.}})

expected.index.name, expected.columns.name = 'index', 'columns'

tm.assert_frame_equal(pivoted, expected)
tm.assertIsInstance(pivoted, tm.SubclassedDataFrame)

def test_to_panel_expanddim(self):
# GH 9762

Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ def test_to_frame(self):
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

def test_subclass_unstack(self):
s = tm.SubclassedSeries(
[1, 2, 3, 4], index=[list('aabb'), list('xyxy')])

res = s.unstack()
exp = tm.SubclassedDataFrame(
{'x': [1, 3], 'y': [2, 4]}, index=['a', 'b'])

tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)


class TestSparseSeriesSubclassing(tm.TestCase):

Expand Down