Skip to content

BUG: don't assume series is length > 0 #19438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ Sparse

- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
-
- Bug in :class:`SparseArray.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)

Reshaping
^^^^^^^^^
Expand Down
38 changes: 37 additions & 1 deletion pandas/core/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.core.base import PandasObject

from pandas import compat
from pandas.compat import range
from pandas.compat import range, PYPY
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.generic import ABCSparseSeries
Expand All @@ -30,6 +30,7 @@
from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype

import pandas._libs.sparse as splib
import pandas._libs.lib as lib
from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex
from pandas._libs import index as libindex
import pandas.core.algorithms as algos
Expand Down Expand Up @@ -238,6 +239,41 @@ def kind(self):
elif isinstance(self.sp_index, IntIndex):
return 'integer'

def memory_usage(self, deep=False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make the doc-string generic in base and use a shared one here?

"""Memory usage of SparseArray

Parameters
----------
deep : bool
Introspect the data deeply, interrogate
`object` dtypes for system-level memory consumption

Returns
-------
scalar bytes of memory consumed

Notes
-----
Memory usage does not include memory of empty cells filled by
fill_value. And it does not include memory consumed by
elements that are not components of the array if deep=False

See also
--------
Series.memory_usage
"""

values = self.sp_values
if hasattr(values, 'memory_usage'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need this here as the values are always an ndarray

return values.memory_usage(deep=deep)

v = values.nbytes

if deep and is_object_dtype(self) and not PYPY:
v += lib.memory_usage_of_objects(values)

return v

def __array_wrap__(self, out_arr, context=None):
"""
NumPy calls this method when ufunc is applied
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from pandas.core.sparse.api import SparseSeries
from pandas.tests.series.test_api import SharedWithSparse

from itertools import product


def _test_data1():
# nan-based
Expand Down Expand Up @@ -971,6 +973,17 @@ def test_combine_first(self):
tm.assert_sp_series_equal(result, result2)
tm.assert_sp_series_equal(result, expected)

@pytest.mark.parametrize('deep,fill_values', [([True, False],
[0, 1, np.nan, None])])
def test_memory_usage_deep(self, deep, fill_values):
for deep, fill_value in product(deep, fill_values):
sparse_series = SparseSeries(fill_values, fill_value=fill_value)
dense_series = Series(fill_values)
sparse_usage = sparse_series.memory_usage(deep=deep)
dense_usage = dense_series.memory_usage(deep=deep)

assert sparse_usage < dense_usage


class TestSparseHandlingMultiIndexes(object):

Expand Down