Skip to content

BUG: Stack/unstack do not return subclassed objects (GH15563) #18929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ Reshaping
- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)
- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)
- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`)
-

Numeric
^^^^^^^
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
mdata[col] = np.asanyarray(frame.columns
._get_level_values(i)).repeat(N)

from pandas import DataFrame
return DataFrame(mdata, columns=mcolumns)
return frame._constructor(mdata, columns=mcolumns)


def lreshape(data, groups, dropna=True, label=None):
Expand Down Expand Up @@ -152,8 +151,7 @@ def lreshape(data, groups, dropna=True, label=None):
if not mask.all():
mdata = {k: v[mask] for k, v in compat.iteritems(mdata)}

from pandas import DataFrame
return DataFrame(mdata, columns=id_cols + pivot_cols)
return data._constructor(mdata, columns=id_cols + pivot_cols)


def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
Expand Down
47 changes: 35 additions & 12 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,23 @@ class _Unstacker(object):

Parameters
----------
values : ndarray
Values of DataFrame to "Unstack"
index : object
Pandas ``Index``
level : int or str, default last level
Level to "unstack". Accepts a name for the level.
value_columns : Index, optional
Pandas ``Index`` or ``MultiIndex`` object if unstacking a DataFrame
fill_value : scalar, optional
Default value to fill in missing values if subgroups do not have the
same set of labels. By default, missing values will be replaced with
the default fill value for that data type, NaN for float, NaT for
datetimelike, etc. For integer types, by default data will converted to
float and missing values will be set to NaN.
constructor : object
Pandas ``DataFrame`` or subclass used to create unstacked
response. If None, DataFrame or SparseDataFrame will be used.

Examples
--------
Expand Down Expand Up @@ -69,7 +84,7 @@ class _Unstacker(object):
"""

def __init__(self, values, index, level=-1, value_columns=None,
fill_value=None):
fill_value=None, constructor=None):

self.is_categorical = None
self.is_sparse = is_sparse(values)
Expand All @@ -86,6 +101,14 @@ def __init__(self, values, index, level=-1, value_columns=None,
self.value_columns = value_columns
self.fill_value = fill_value

if constructor is None:
if self.is_sparse:
self.constructor = SparseDataFrame
else:
self.constructor = DataFrame
else:
self.constructor = constructor

if value_columns is None and values.shape[1] != 1: # pragma: no cover
raise ValueError('must pass column labels for multi-column data')

Expand Down Expand Up @@ -173,8 +196,7 @@ def get_result(self):
ordered=ordered)
for i in range(values.shape[-1])]

klass = SparseDataFrame if self.is_sparse else DataFrame
return klass(values, index=index, columns=columns)
return self.constructor(values, index=index, columns=columns)

def get_new_values(self):
values = self.values
Expand Down Expand Up @@ -374,8 +396,9 @@ def pivot(self, index=None, columns=None, values=None):
index = self.index
else:
index = self[index]
indexed = Series(self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))
indexed = self._constructor_sliced(
self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))
return indexed.unstack(columns)


Expand Down Expand Up @@ -461,7 +484,8 @@ def unstack(obj, level, fill_value=None):
return obj.T.stack(dropna=False)
else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
fill_value=fill_value)
fill_value=fill_value,
constructor=obj._constructor_expanddim)
return unstacker.get_result()


Expand All @@ -470,12 +494,12 @@ def _unstack_frame(obj, level, fill_value=None):
unstacker = partial(_Unstacker, index=obj.index,
level=level, fill_value=fill_value)
blocks = obj._data.unstack(unstacker)
klass = type(obj)
return klass(blocks)
return obj._constructor(blocks)
else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
value_columns=obj.columns,
fill_value=fill_value)
fill_value=fill_value,
constructor=obj._constructor)
return unstacker.get_result()


Expand Down Expand Up @@ -528,8 +552,7 @@ def factorize(index):
new_values = new_values[mask]
new_index = new_index[mask]

klass = type(frame)._constructor_sliced
return klass(new_values, index=new_index)
return frame._constructor_sliced(new_values, index=new_index)


def stack_multiple(frame, level, dropna=True):
Expand Down Expand Up @@ -676,7 +699,7 @@ def _convert_level_number(level_num, columns):
new_index = MultiIndex(levels=new_levels, labels=new_labels,
names=new_names, verify_integrity=False)

result = DataFrame(new_data, index=new_index, columns=new_columns)
result = frame._constructor(new_data, index=new_index, columns=new_columns)

# more efficient way to go about this? can do the whole masking biz but
# will only save a small amount of time...
Expand Down
Loading