Skip to content

Commit a01f74c

Browse files
hexgnujreback
authored andcommitted
BUG: don't assume series is length > 0
closes #19368 Author: Matthew Kirk <[email protected]> Closes #19438 from hexgnu/segfault_memory_usage and squashes the following commits: f9433d8 [Matthew Kirk] Use shared docstring and get rid of if condition 4ead141 [Matthew Kirk] Move whatsnew doc to Sparse ae9f74d [Matthew Kirk] Revert base.py cdd4141 [Matthew Kirk] Fix linting error 93a0c3d [Matthew Kirk] Merge remote-tracking branch 'upstream/master' into segfault_memory_usage 207bc74 [Matthew Kirk] Define memory_usage on SparseArray 21ae147 [Matthew Kirk] FIX: revert change to lib.pyx 3f52a44 [Matthew Kirk] Ah ha I think I got it 5e59e9c [Matthew Kirk] Use range over 0 <= for loops e251587 [Matthew Kirk] Fix failing test with indexing 27df317 [Matthew Kirk] Merge remote-tracking branch 'upstream/master' into segfault_memory_usage 7fdd03e [Matthew Kirk] Take out comment and use product 6bd6ddd [Matthew Kirk] BUG: don't assume series is length > 0
1 parent f391cbf commit a01f74c

File tree

4 files changed

+29
-4
lines changed

4 files changed

+29
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ Sparse
606606

607607
- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
608608
- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
609-
-
609+
- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)
610610

611611
Reshaping
612612
^^^^^^^^^

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1048,7 +1048,7 @@ def is_monotonic_decreasing(self):
10481048

10491049
def memory_usage(self, deep=False):
10501050
"""
1051-
Memory usage of my values
1051+
Memory usage of the values
10521052
10531053
Parameters
10541054
----------

pandas/core/sparse/array.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import warnings
99

1010
import pandas as pd
11-
from pandas.core.base import PandasObject
11+
from pandas.core.base import PandasObject, IndexOpsMixin
1212

1313
from pandas import compat
14-
from pandas.compat import range
14+
from pandas.compat import range, PYPY
1515
from pandas.compat.numpy import function as nv
1616

1717
from pandas.core.dtypes.generic import ABCSparseSeries
@@ -30,6 +30,7 @@
3030
from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
3131

3232
import pandas._libs.sparse as splib
33+
import pandas._libs.lib as lib
3334
from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex
3435
from pandas._libs import index as libindex
3536
import pandas.core.algorithms as algos
@@ -238,6 +239,17 @@ def kind(self):
238239
elif isinstance(self.sp_index, IntIndex):
239240
return 'integer'
240241

242+
@Appender(IndexOpsMixin.memory_usage.__doc__)
243+
def memory_usage(self, deep=False):
244+
values = self.sp_values
245+
246+
v = values.nbytes
247+
248+
if deep and is_object_dtype(self) and not PYPY:
249+
v += lib.memory_usage_of_objects(values)
250+
251+
return v
252+
241253
def __array_wrap__(self, out_arr, context=None):
242254
"""
243255
NumPy calls this method when ufunc is applied

pandas/tests/sparse/series/test_series.py

+13
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from pandas.core.sparse.api import SparseSeries
2424
from pandas.tests.series.test_api import SharedWithSparse
2525

26+
from itertools import product
27+
2628

2729
def _test_data1():
2830
# nan-based
@@ -971,6 +973,17 @@ def test_combine_first(self):
971973
tm.assert_sp_series_equal(result, result2)
972974
tm.assert_sp_series_equal(result, expected)
973975

976+
@pytest.mark.parametrize('deep,fill_values', [([True, False],
977+
[0, 1, np.nan, None])])
978+
def test_memory_usage_deep(self, deep, fill_values):
979+
for deep, fill_value in product(deep, fill_values):
980+
sparse_series = SparseSeries(fill_values, fill_value=fill_value)
981+
dense_series = Series(fill_values)
982+
sparse_usage = sparse_series.memory_usage(deep=deep)
983+
dense_usage = dense_series.memory_usage(deep=deep)
984+
985+
assert sparse_usage < dense_usage
986+
974987

975988
class TestSparseHandlingMultiIndexes(object):
976989

0 commit comments

Comments
 (0)