Skip to content

Commit 44e9fef

Browse files
committed
COMPAT: avoid calling getsizeof() on PyPy
1 parent 3e9e947 commit 44e9fef

File tree

9 files changed

+49
-25
lines changed

9 files changed

+49
-25
lines changed

doc/source/whatsnew/v0.21.0.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,10 @@ Bug Fixes
307307
Conversion
308308
^^^^^^^^^^
309309

310-
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
310+
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
311311
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
312+
- Add a ``default`` argument to ``sys.getsizeof`` used in ``memory_usage`` to support PyPy. Objects on PyPy do not have a
313+
fixed size, so an approximage guess is used instead (:issue: `17228`)
312314

313315

314316
Indexing

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from distutils.version import LooseVersion
3232
from itertools import product
3333
import sys
34+
import platform
3435
import types
3536
from unicodedata import east_asian_width
3637
import struct
@@ -41,6 +42,7 @@
4142
PY3 = (sys.version_info[0] >= 3)
4243
PY35 = (sys.version_info >= (3, 5))
4344
PY36 = (sys.version_info >= (3, 6))
45+
PYPY = (platform.python_implementation() == 'PyPy')
4446

4547
try:
4648
import __builtin__ as builtins

pandas/core/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.core.nanops as nanops
1616
import pandas._libs.lib as lib
1717
from pandas.compat.numpy import function as nv
18+
from pandas.compat import PYPY
1819
from pandas.util._decorators import (Appender, cache_readonly,
1920
deprecate_kwarg, Substitution)
2021
from pandas.core.common import AbstractMethodError
@@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False):
10611062
Notes
10621063
-----
10631064
Memory usage does not include memory consumed by elements that
1064-
are not components of the array if deep=False
1065+
are not components of the array if deep=False or if used on PyPy
10651066
10661067
See Also
10671068
--------
@@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False):
10711072
return self.values.memory_usage(deep=deep)
10721073

10731074
v = self.values.nbytes
1074-
if deep and is_object_dtype(self):
1075+
if deep and is_object_dtype(self) and not PYPY:
10751076
v += lib.memory_usage_of_objects(self.values)
1076-
10771077
return v
10781078

10791079
def factorize(self, sort=False, na_sentinel=-1):

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ def _nbytes(self, deep=False):
467467
"""
468468
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
469469
label_nbytes = sum((i.nbytes for i in self.labels))
470-
names_nbytes = sum((getsizeof(i) for i in self.names))
470+
objsize = 24 # for implementations with no useful getsizeof (PyPy)
471+
names_nbytes = sum((getsizeof(i, objsize) for i in self.names))
471472
result = level_nbytes + label_nbytes + names_nbytes
472473

473474
# include our engine hashtable

pandas/core/indexes/range.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,11 @@ def _format_data(self):
194194

195195
@cache_readonly
196196
def nbytes(self):
197-
""" return the number of bytes in the underlying data """
198-
return sum([getsizeof(getattr(self, v)) for v in
197+
""" return the number of bytes in the underlying data
198+
On implementations where this is problematic (PyPy)
199+
assume 24 bytes for each value
200+
"""
201+
return sum([getsizeof(getattr(self, v), 24) for v in
199202
['_start', '_stop', '_step']])
200203

201204
def memory_usage(self, deep=False):

pandas/tests/frame/test_repr_info.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pytest
1212

1313
from pandas import (DataFrame, compat, option_context)
14-
from pandas.compat import StringIO, lrange, u
14+
from pandas.compat import StringIO, lrange, u, PYPY
1515
import pandas.io.formats.format as fmt
1616
import pandas as pd
1717

@@ -332,13 +332,25 @@ def test_info_memory_usage(self):
332332
res = buf.getvalue().splitlines()
333333
assert re.match(r"memory usage: [^+]+$", res[-1])
334334

335-
assert (df_with_object_index.memory_usage(
336-
index=True, deep=True).sum() > df_with_object_index.memory_usage(
337-
index=True).sum())
335+
if PYPY:
336+
assert (df_with_object_index.memory_usage(
337+
index=True, deep=True).sum() ==
338+
df_with_object_index.memory_usage(
339+
index=True).sum())
338340

339-
df_object = pd.DataFrame({'a': ['a']})
340-
assert (df_object.memory_usage(deep=True).sum() >
341-
df_object.memory_usage().sum())
341+
df_object = pd.DataFrame({'a': ['a']})
342+
assert (df_object.memory_usage(deep=True).sum() ==
343+
df_object.memory_usage().sum())
344+
345+
else:
346+
assert (df_with_object_index.memory_usage(
347+
index=True, deep=True).sum() >
348+
df_with_object_index.memory_usage(
349+
index=True).sum())
350+
351+
df_object = pd.DataFrame({'a': ['a']})
352+
assert (df_object.memory_usage(deep=True).sum() >
353+
df_object.memory_usage().sum())
342354

343355
# Test a DataFrame with duplicate columns
344356
dtypes = ['int64', 'int64', 'int64', 'float64']
@@ -377,10 +389,12 @@ def test_info_memory_usage(self):
377389
df.memory_usage(index=True)
378390
df.index.values.nbytes
379391

380-
# sys.getsizeof will call the .memory_usage with
381-
# deep=True, and add on some GC overhead
382-
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
383-
assert abs(diff) < 100
392+
mem = df.memory_usage(deep=True).sum()
393+
if not PYPY:
394+
# sys.getsizeof will call the .memory_usage with
395+
# deep=True, and add on some GC overhead
396+
diff = mem - sys.getsizeof(df)
397+
assert abs(diff) < 100
384398

385399
def test_info_memory_usage_qualified(self):
386400

pandas/tests/test_base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import pandas.util.testing as tm
1616
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
1717
Timedelta, IntervalIndex, Interval)
18-
from pandas.compat import StringIO
18+
from pandas.compat import StringIO, PYPY
1919
from pandas.compat.numpy import np_array_datetime64_compat
2020
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
2121
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
@@ -144,6 +144,7 @@ def f():
144144

145145
pytest.raises(TypeError, f)
146146

147+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
147148
def test_memory_usage(self):
148149
# Delegate does not implement memory_usage.
149150
# Check that we fall back to in-built `__sizeof__`
@@ -941,6 +942,7 @@ def test_fillna(self):
941942
# check shallow_copied
942943
assert o is not result
943944

945+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
944946
def test_memory_usage(self):
945947
for o in self.objs:
946948
res = o.memory_usage()

pandas/tests/test_categorical.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
period_range, PeriodIndex,
2525
timedelta_range, TimedeltaIndex, NaT,
2626
Interval, IntervalIndex)
27-
from pandas.compat import range, lrange, u, PY3
27+
from pandas.compat import range, lrange, u, PY3, PYPY
2828
from pandas.core.config import option_context
2929

3030

@@ -1448,10 +1448,11 @@ def test_memory_usage(self):
14481448
cat = pd.Categorical(['foo', 'foo', 'bar'])
14491449
assert cat.memory_usage(deep=True) > cat.nbytes
14501450

1451-
# sys.getsizeof will call the .memory_usage with
1452-
# deep=True, and add on some GC overhead
1453-
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1454-
assert abs(diff) < 100
1451+
if not PYPY:
1452+
# sys.getsizeof will call the .memory_usage with
1453+
# deep=True, and add on some GC overhead
1454+
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1455+
assert abs(diff) < 100
14551456

14561457
def test_searchsorted(self):
14571458
# https://github.com/pandas-dev/pandas/issues/8420

pandas/util/testing.py

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
K = 4
5757
_RAISE_NETWORK_ERROR_DEFAULT = False
5858

59-
6059
# set testing_mode
6160
_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)
6261

0 commit comments

Comments
 (0)