Skip to content

Commit 045a341

Browse files
committed
COMPAT: avoid calling getsizeof() on PyPy
1 parent 3e9e947 commit 045a341

File tree

7 files changed

+32
-20
lines changed

7 files changed

+32
-20
lines changed

pandas/core/base.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
2424
unique='IndexOpsMixin', duplicated='IndexOpsMixin')
2525

26+
import platform
27+
IS_PYPY = platform.python_implementation() == 'PyPy'
2628

2729
class StringMixin(object):
2830
"""implements string methods so long as object defines a `__unicode__`
@@ -1061,7 +1063,7 @@ def memory_usage(self, deep=False):
10611063
Notes
10621064
-----
10631065
Memory usage does not include memory consumed by elements that
1064-
are not components of the array if deep=False
1066+
are not components of the array if deep=False or if used on PyPy
10651067
10661068
See Also
10671069
--------
@@ -1071,9 +1073,8 @@ def memory_usage(self, deep=False):
10711073
return self.values.memory_usage(deep=deep)
10721074

10731075
v = self.values.nbytes
1074-
if deep and is_object_dtype(self):
1076+
if deep and is_object_dtype(self) and not IS_PYPY:
10751077
v += lib.memory_usage_of_objects(self.values)
1076-
10771078
return v
10781079

10791080
def factorize(self, sort=False, na_sentinel=-1):

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ def _nbytes(self, deep=False):
467467
"""
468468
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
469469
label_nbytes = sum((i.nbytes for i in self.labels))
470-
names_nbytes = sum((getsizeof(i) for i in self.names))
470+
objsize = 24 # for inplementations with no meaningfule getsizeof (PyPy)
471+
names_nbytes = sum((getsizeof(i, 24) for i in self.names))
471472
result = level_nbytes + label_nbytes + names_nbytes
472473

473474
# include our engine hashtable

pandas/core/indexes/range.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,11 @@ def _format_data(self):
194194

195195
@cache_readonly
196196
def nbytes(self):
197-
""" return the number of bytes in the underlying data """
198-
return sum([getsizeof(getattr(self, v)) for v in
197+
""" return the number of bytes in the underlying data
198+
On implementations where this is problematic (PyPy)
199+
assume 24 bytes for each value
200+
"""
201+
return sum([getsizeof(getattr(self, v), 24) for v in
199202
['_start', '_stop', '_step']])
200203

201204
def memory_usage(self, deep=False):

pandas/tests/frame/test_repr_info.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,14 @@ def test_info_memory_usage(self):
332332
res = buf.getvalue().splitlines()
333333
assert re.match(r"memory usage: [^+]+$", res[-1])
334334

335-
assert (df_with_object_index.memory_usage(
336-
index=True, deep=True).sum() > df_with_object_index.memory_usage(
337-
index=True).sum())
335+
if not tm.IS_PYPY:
336+
assert (df_with_object_index.memory_usage(
337+
index=True, deep=True).sum() > df_with_object_index.memory_usage(
338+
index=True).sum())
338339

339-
df_object = pd.DataFrame({'a': ['a']})
340-
assert (df_object.memory_usage(deep=True).sum() >
341-
df_object.memory_usage().sum())
340+
df_object = pd.DataFrame({'a': ['a']})
341+
assert (df_object.memory_usage(deep=True).sum() >
342+
df_object.memory_usage().sum())
342343

343344
# Test a DataFrame with duplicate columns
344345
dtypes = ['int64', 'int64', 'int64', 'float64']
@@ -377,10 +378,11 @@ def test_info_memory_usage(self):
377378
df.memory_usage(index=True)
378379
df.index.values.nbytes
379380

380-
# sys.getsizeof will call the .memory_usage with
381-
# deep=True, and add on some GC overhead
382-
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
383-
assert abs(diff) < 100
381+
if not tm.IS_PYPY:
382+
# sys.getsizeof will call the .memory_usage with
383+
# deep=True, and add on some GC overhead
384+
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
385+
assert abs(diff) < 100
384386

385387
def test_info_memory_usage_qualified(self):
386388

pandas/tests/test_base.py

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def f():
144144

145145
pytest.raises(TypeError, f)
146146

147+
@pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy")
147148
def test_memory_usage(self):
148149
# Delegate does not implement memory_usage.
149150
# Check that we fall back to in-built `__sizeof__`
@@ -941,6 +942,7 @@ def test_fillna(self):
941942
# check shallow_copied
942943
assert o is not result
943944

945+
@pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy")
944946
def test_memory_usage(self):
945947
for o in self.objs:
946948
res = o.memory_usage()

pandas/tests/test_categorical.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1448,10 +1448,11 @@ def test_memory_usage(self):
14481448
cat = pd.Categorical(['foo', 'foo', 'bar'])
14491449
assert cat.memory_usage(deep=True) > cat.nbytes
14501450

1451-
# sys.getsizeof will call the .memory_usage with
1452-
# deep=True, and add on some GC overhead
1453-
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1454-
assert abs(diff) < 100
1451+
if not tm.IS_PYPY:
1452+
# sys.getsizeof will call the .memory_usage with
1453+
# deep=True, and add on some GC overhead
1454+
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1455+
assert abs(diff) < 100
14551456

14561457
def test_searchsorted(self):
14571458
# https://github.com/pandas-dev/pandas/issues/8420

pandas/util/testing.py

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
K = 4
5757
_RAISE_NETWORK_ERROR_DEFAULT = False
5858

59+
import platform
60+
IS_PYPY = platform.python_implementation() == 'PyPy'
5961

6062
# set testing_mode
6163
_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)

0 commit comments

Comments
 (0)