Skip to content

BUG: Slicing subclasses of SparseDataFrames. #13787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ API changes
- The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json <io.jsonl>` (:issue:`9180`)
- ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`)
- ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`)
- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`)


.. _whatsnew_0190.api.tolist:
Expand Down
8 changes: 8 additions & 0 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ def compare(self, vf, version):
comparator(result, expected, typ, version)
return data

def compare_sp_series_ts(self, res, exp, typ, version):
# SparseTimeSeries integrated into SparseSeries in 0.12.0
# and deprecated in 0.17.0
if version and LooseVersion(version) <= "0.12.0":
tm.assert_sp_series_equal(res, exp, check_series_type=False)
else:
tm.assert_sp_series_equal(res, exp)

def compare_series_ts(self, result, expected, typ, version):
# GH 7748
tm.assert_series_equal(result, expected)
Expand Down
23 changes: 13 additions & 10 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
return self._init_dict(data, index, columns, dtype)

def __array_wrap__(self, result):
return SparseDataFrame(
return self._constructor(
result, index=self.index, columns=self.columns,
default_kind=self._default_kind,
default_fill_value=self._default_fill_value).__finalize__(self)
Expand Down Expand Up @@ -407,7 +407,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
raise NotImplementedError("'level' argument is not supported")

if self.empty and other.empty:
return SparseDataFrame(index=new_index).__finalize__(self)
return self._constructor(index=new_index).__finalize__(self)

new_data = {}
new_fill_value = None
Expand Down Expand Up @@ -519,7 +519,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
return self

if len(self.index) == 0:
return SparseDataFrame(index=index, columns=self.columns)
return self._constructor(
index=index, columns=self.columns).__finalize__(self)

indexer = self.index.get_indexer(index, method, limit=limit)
indexer = _ensure_platform_int(indexer)
Expand All @@ -540,8 +541,9 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,

new_series[col] = new

return SparseDataFrame(new_series, index=index, columns=self.columns,
default_fill_value=self._default_fill_value)
return self._constructor(
new_series, index=index, columns=self.columns,
default_fill_value=self._default_fill_value).__finalize__(self)

def _reindex_columns(self, columns, copy, level, fill_value, limit=None,
takeable=False):
Expand All @@ -556,8 +558,9 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None,

# TODO: fill value handling
sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns)
return SparseDataFrame(sdict, index=self.index, columns=columns,
default_fill_value=self._default_fill_value)
return self._constructor(
sdict, index=self.index, columns=columns,
default_fill_value=self._default_fill_value).__finalize__(self)

def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
limit=None, copy=False, allow_dups=False):
Expand Down Expand Up @@ -586,8 +589,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
else:
new_arrays[col] = self[col]

return SparseDataFrame(new_arrays, index=index,
columns=columns).__finalize__(self)
return self._constructor(new_arrays, index=index,
columns=columns).__finalize__(self)

def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
sort=False):
Expand Down Expand Up @@ -644,7 +647,7 @@ def transpose(self, *args, **kwargs):
Returns a DataFrame with the rows/columns switched.
"""
nv.validate_transpose(args, kwargs)
return SparseDataFrame(
return self._constructor(
self.values.T, index=self.columns, columns=self.index,
default_fill_value=self._default_fill_value,
default_kind=self._default_kind).__finalize__(self)
Expand Down
12 changes: 6 additions & 6 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ def wrapper(self, other):
new_fill_value = op(np.float64(self.fill_value),
np.float64(other))

return SparseSeries(op(self.sp_values, other),
index=self.index,
sparse_index=self.sp_index,
fill_value=new_fill_value,
name=self.name)
return self._constructor(op(self.sp_values, other),
index=self.index,
sparse_index=self.sp_index,
fill_value=new_fill_value,
name=self.name)
else: # pragma: no cover
raise TypeError('operation with %s not supported' % type(other))

Expand All @@ -85,7 +85,7 @@ def _sparse_series_op(left, right, op, name):
new_name = _maybe_match_name(left, right)

result = _sparse_array_op(left, right, op, name)
return SparseSeries(result, index=new_index, name=new_name)
return left._constructor(result, index=new_index, name=new_name)


class SparseSeries(Series):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,33 @@ def test_subclass_align_combinations(self):
tm.assert_series_equal(res1, exp2)
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
tm.assert_frame_equal(res2, exp1)

def test_subclass_sparse_slice(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have lots of changes, but want to see if we can have a test for each, can you audit these changes and add tests if needed.

rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
ssdf = tm.SubclassedSparseDataFrame(rows)
ssdf.testattr = "testattr"

tm.assert_sp_frame_equal(ssdf.loc[:2],
tm.SubclassedSparseDataFrame(rows[:3]))
tm.assert_sp_frame_equal(ssdf.iloc[:2],
tm.SubclassedSparseDataFrame(rows[:2]))
tm.assert_sp_frame_equal(ssdf[:2],
tm.SubclassedSparseDataFrame(rows[:2]))
tm.assert_equal(ssdf.loc[:2].testattr, "testattr")
tm.assert_equal(ssdf.iloc[:2].testattr, "testattr")
tm.assert_equal(ssdf[:2].testattr, "testattr")

tm.assert_sp_series_equal(ssdf.loc[1],
tm.SubclassedSparseSeries(rows[1]),
check_names=False)
tm.assert_sp_series_equal(ssdf.iloc[1],
tm.SubclassedSparseSeries(rows[1]),
check_names=False)

def test_subclass_sparse_transpose(self):
ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3],
[4, 5, 6]])
essdf = tm.SubclassedSparseDataFrame([[1, 4],
[2, 5],
[3, 6]])
tm.assert_sp_frame_equal(ossdf.T, essdf)
24 changes: 24 additions & 0 deletions pandas/tests/series/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,27 @@ def test_to_frame(self):
exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

def test_subclass_sparse_slice(self):
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
tm.assert_sp_series_equal(s.loc[1:3],
tm.SubclassedSparseSeries([2.0, 3.0, 4.0],
index=[1, 2, 3]))
tm.assert_sp_series_equal(s.iloc[1:3],
tm.SubclassedSparseSeries([2.0, 3.0],
index=[1, 2]))
tm.assert_sp_series_equal(s[1:3],
tm.SubclassedSparseSeries([2.0, 3.0],
index=[1, 2]))

def test_subclass_sparse_addition(self):
s1 = tm.SubclassedSparseSeries([1, 3, 5])
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
tm.assert_sp_series_equal(s1 + s2,
tm.SubclassedSparseSeries([-1.0, 8.0, 17.0]))

def test_subclass_sparse_to_frame(self):
s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
res = s.to_frame()
exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd'))
tm.assert_sp_frame_equal(res, exp)
73 changes: 68 additions & 5 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,8 @@ def assert_panelnd_equal(left, right,
check_less_precise=False,
assert_func=assert_frame_equal,
check_names=False,
by_blocks=False):
by_blocks=False,
obj='Panel'):
"""Check that left and right Panels are equal.

Parameters
Expand All @@ -1343,6 +1344,9 @@ def assert_panelnd_equal(left, right,
by_blocks : bool, default False
Specify how to compare internal data. If False, compare by columns.
If True, compare by blocks.
obj : str, default 'Panel'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""

if check_panel_type:
Expand Down Expand Up @@ -1404,10 +1408,30 @@ def assert_sp_array_equal(left, right):


def assert_sp_series_equal(left, right, exact_indices=True,
check_names=True, obj='SparseSeries'):
check_series_type=True,
check_names=True,
obj='SparseSeries'):
"""Check that the left and right SparseSeries are equal.

Parameters
----------
left : SparseSeries
right : SparseSeries
exact_indices : bool, default True
check_series_type : bool, default True
Whether to check the SparseSeries class is identical.
check_names : bool, default True
Whether to check the SparseSeries name attribute.
obj : str, default 'SparseSeries'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""
assertIsInstance(left, pd.SparseSeries, '[SparseSeries]')
assertIsInstance(right, pd.SparseSeries, '[SparseSeries]')

if check_series_type:
assert_class_equal(left, right, obj=obj)

assert_index_equal(left.index, right.index,
obj='{0}.index'.format(obj))

Expand All @@ -1421,14 +1445,29 @@ def assert_sp_series_equal(left, right, exact_indices=True,


def assert_sp_frame_equal(left, right, exact_indices=True,
check_frame_type=True,
obj='SparseDataFrame'):
"""
exact: Series SparseIndex objects must be exactly the same, otherwise just
compare dense representations
"""Check that the left and right SparseDataFrame are equal.

Parameters
----------
left : SparseDataFrame
right : SparseDataFrame
exact_indices : bool, default True
SparseSeries SparseIndex objects must be exactly the same,
otherwise just compare dense representations.
check_frame_type : bool, default True
Whether to check the SparseDataFrame class is identical.
obj : str, default 'SparseDataFrame'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""
assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]')
assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]')

if check_frame_type:
assert_class_equal(left, right, obj=obj)

assert_index_equal(left.index, right.index,
obj='{0}.index'.format(obj))
assert_index_equal(left.columns, right.columns,
Expand Down Expand Up @@ -2607,6 +2646,30 @@ def _constructor_sliced(self):
return SubclassedSeries


class SubclassedSparseSeries(pd.SparseSeries):
_metadata = ['testattr']

@property
def _constructor(self):
return SubclassedSparseSeries

@property
def _constructor_expanddim(self):
return SubclassedSparseDataFrame


class SubclassedSparseDataFrame(pd.SparseDataFrame):
_metadata = ['testattr']

@property
def _constructor(self):
return SubclassedSparseDataFrame

@property
def _constructor_sliced(self):
return SubclassedSparseSeries


@contextmanager
def patch(ob, attr, value):
"""Temporarily patch an attribute of an object.
Expand Down