Skip to content

Commit 08d2e0c

Browse files
committed
COMPAT: avoid calling getsizeof() on PyPy
1 parent 3e9e947 commit 08d2e0c

File tree

8 files changed

+45
-20
lines changed

8 files changed

+45
-20
lines changed

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from distutils.version import LooseVersion
3232
from itertools import product
3333
import sys
34+
import platform
3435
import types
3536
from unicodedata import east_asian_width
3637
import struct
@@ -41,6 +42,7 @@
4142
PY3 = (sys.version_info[0] >= 3)
4243
PY35 = (sys.version_info >= (3, 5))
4344
PY36 = (sys.version_info >= (3, 6))
45+
PYPY = (platform.python_implementation() == 'PyPy')
4446

4547
try:
4648
import __builtin__ as builtins

pandas/core/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.core.nanops as nanops
1616
import pandas._libs.lib as lib
1717
from pandas.compat.numpy import function as nv
18+
from pandas.compat import PYPY
1819
from pandas.util._decorators import (Appender, cache_readonly,
1920
deprecate_kwarg, Substitution)
2021
from pandas.core.common import AbstractMethodError
@@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False):
10611062
Notes
10621063
-----
10631064
Memory usage does not include memory consumed by elements that
1064-
are not components of the array if deep=False
1065+
are not components of the array if deep=False or if used on PyPy
10651066
10661067
See Also
10671068
--------
@@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False):
10711072
return self.values.memory_usage(deep=deep)
10721073

10731074
v = self.values.nbytes
1074-
if deep and is_object_dtype(self):
1075+
if deep and is_object_dtype(self) and not PYPY:
10751076
v += lib.memory_usage_of_objects(self.values)
1076-
10771077
return v
10781078

10791079
def factorize(self, sort=False, na_sentinel=-1):

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ def _nbytes(self, deep=False):
467467
"""
468468
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
469469
label_nbytes = sum((i.nbytes for i in self.labels))
470-
names_nbytes = sum((getsizeof(i) for i in self.names))
470+
objsize = 24 # for implementations with no useful getsizeof (PyPy)
471+
names_nbytes = sum((getsizeof(i, objsize) for i in self.names))
471472
result = level_nbytes + label_nbytes + names_nbytes
472473

473474
# include our engine hashtable

pandas/core/indexes/range.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,11 @@ def _format_data(self):
194194

195195
@cache_readonly
196196
def nbytes(self):
197-
""" return the number of bytes in the underlying data """
198-
return sum([getsizeof(getattr(self, v)) for v in
197+
""" return the number of bytes in the underlying data
198+
On implementations where this is problematic (PyPy)
199+
assume 24 bytes for each value
200+
"""
201+
return sum([getsizeof(getattr(self, v), 24) for v in
199202
['_start', '_stop', '_step']])
200203

201204
def memory_usage(self, deep=False):

pandas/tests/frame/test_repr_info.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,25 @@ def test_info_memory_usage(self):
332332
res = buf.getvalue().splitlines()
333333
assert re.match(r"memory usage: [^+]+$", res[-1])
334334

335-
assert (df_with_object_index.memory_usage(
336-
index=True, deep=True).sum() > df_with_object_index.memory_usage(
337-
index=True).sum())
335+
if tm.IS_PYPY:
336+
assert (df_with_object_index.memory_usage(
337+
index=True, deep=True).sum() ==
338+
df_with_object_index.memory_usage(
339+
index=True).sum())
338340

339-
df_object = pd.DataFrame({'a': ['a']})
340-
assert (df_object.memory_usage(deep=True).sum() >
341-
df_object.memory_usage().sum())
341+
df_object = pd.DataFrame({'a': ['a']})
342+
assert (df_object.memory_usage(deep=True).sum() ==
343+
df_object.memory_usage().sum())
344+
345+
else:
346+
assert (df_with_object_index.memory_usage(
347+
index=True, deep=True).sum() >
348+
df_with_object_index.memory_usage(
349+
index=True).sum())
350+
351+
df_object = pd.DataFrame({'a': ['a']})
352+
assert (df_object.memory_usage(deep=True).sum() >
353+
df_object.memory_usage().sum())
342354

343355
# Test a DataFrame with duplicate columns
344356
dtypes = ['int64', 'int64', 'int64', 'float64']
@@ -377,10 +389,12 @@ def test_info_memory_usage(self):
377389
df.memory_usage(index=True)
378390
df.index.values.nbytes
379391

380-
# sys.getsizeof will call the .memory_usage with
381-
# deep=True, and add on some GC overhead
382-
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
383-
assert abs(diff) < 100
392+
mem = df.memory_usage(deep=True).sum()
393+
if not tm.IS_PYPY:
394+
# sys.getsizeof will call the .memory_usage with
395+
# deep=True, and add on some GC overhead
396+
diff = mem - sys.getsizeof(df)
397+
assert abs(diff) < 100
384398

385399
def test_info_memory_usage_qualified(self):
386400

pandas/tests/test_base.py

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def f():
144144

145145
pytest.raises(TypeError, f)
146146

147+
@pytest.mark.skipif(tm.IS_PYPY, reason="not relevant for PyPy")
147148
def test_memory_usage(self):
148149
# Delegate does not implement memory_usage.
149150
# Check that we fall back to in-built `__sizeof__`
@@ -941,6 +942,7 @@ def test_fillna(self):
941942
# check shallow_copied
942943
assert o is not result
943944

945+
@pytest.mark.skipif(tm.IS_PYPY, reason="not relevant for PyPy")
944946
def test_memory_usage(self):
945947
for o in self.objs:
946948
res = o.memory_usage()

pandas/tests/test_categorical.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1448,10 +1448,11 @@ def test_memory_usage(self):
14481448
cat = pd.Categorical(['foo', 'foo', 'bar'])
14491449
assert cat.memory_usage(deep=True) > cat.nbytes
14501450

1451-
# sys.getsizeof will call the .memory_usage with
1452-
# deep=True, and add on some GC overhead
1453-
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1454-
assert abs(diff) < 100
1451+
if not tm.IS_PYPY:
1452+
# sys.getsizeof will call the .memory_usage with
1453+
# deep=True, and add on some GC overhead
1454+
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1455+
assert abs(diff) < 100
14551456

14561457
def test_searchsorted(self):
14571458
# https://github.com/pandas-dev/pandas/issues/8420

pandas/util/testing.py

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
K = 4
5757
_RAISE_NETWORK_ERROR_DEFAULT = False
5858

59+
import platform
60+
IS_PYPY = platform.python_implementation() == 'PyPy'
5961

6062
# set testing_mode
6163
_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)

0 commit comments

Comments
 (0)