From 52cd4dc5f69d36a285d6bf9dd6db65e9b2b094e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 02:12:45 +0000 Subject: [PATCH 1/8] BUG: Fix getsizeof when using Series(obj) and taking into account GC corrections Calling __sizeof__ to compute the size of an object produces an error, and in general, this method does not take Garbage Collector corrections into account. --- pandas/_libs/lib.pyx | 3 ++- pandas/tests/base/test_sizeof.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/base/test_sizeof.py diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9fcf6b28953b..051ced97125f2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1,6 +1,7 @@ from collections import abc from decimal import Decimal from enum import Enum +from sys import getsizeof from typing import ( Literal, _GenericAlias, @@ -159,7 +160,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: n = len(arr) for i in range(n): - size += arr[i].__sizeof__() + size += getsizeof(arr[i]) return size diff --git a/pandas/tests/base/test_sizeof.py b/pandas/tests/base/test_sizeof.py new file mode 100644 index 0000000000000..ffbf95ad17c67 --- /dev/null +++ b/pandas/tests/base/test_sizeof.py @@ -0,0 +1,24 @@ +from sys import getsizeof + +from pandas import ( + DataFrame, + Series, +) + + +def test_sysof(): + assert getsizeof(DataFrame) == 1072 + assert getsizeof(DataFrame()) == 140 + assert getsizeof(DataFrame([])) == 140 + + +def test_sysof_series(): + assert getsizeof(Series) == 1200 + assert getsizeof(Series()) == 140 + getsizeof(Series(str)) + getsizeof(Series(int)) + getsizeof(Series(list)) + + +def test_memory_usage_series(): + Series(str).memory_usage(deep=True) From 802b442736bd9b5e10b67f032af6865b223eb3a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 02:47:46 +0000 Subject: [PATCH 2/8] BUG: Fix getsizeof when using Series(obj) and taking into account GC corrections Calling __sizeof__ to compute the size of an object produces an error, and in general, this method does not take Garbage Collector corrections into account. --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c12807304f74d..ee83cbd98aa03 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -225,6 +225,7 @@ Styler Other ^^^^^ +- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) .. ***DO NOT USE THIS SECTION*** From 2c4fa35e9f44c39d46b684ff9eaa3bc8d25c0997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 15:10:01 +0000 Subject: [PATCH 3/8] Removing unnecessary tests that depend on OS bits --- pandas/tests/base/test_sizeof.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/base/test_sizeof.py b/pandas/tests/base/test_sizeof.py index ffbf95ad17c67..00b903d5212a7 100644 --- a/pandas/tests/base/test_sizeof.py +++ b/pandas/tests/base/test_sizeof.py @@ -7,14 +7,12 @@ def test_sysof(): - assert getsizeof(DataFrame) == 1072 - assert getsizeof(DataFrame()) == 140 - assert getsizeof(DataFrame([])) == 140 + getsizeof(DataFrame) + getsizeof(DataFrame()) + getsizeof(DataFrame([])) def test_sysof_series(): - assert getsizeof(Series) == 1200 - assert getsizeof(Series()) == 140 getsizeof(Series(str)) getsizeof(Series(int)) getsizeof(Series(list)) From c0e3fc53ff207a1328aeb016f5eab280350d1b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 17:25:38 -0500 Subject: [PATCH 4/8] TST: Included python data types --- pandas/_testing/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index c49dda2763c83..410cf7c6cbe3a 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -177,6 +177,23 @@ np.uint32, ] +PYTHON_DATA_TYPES = [ + str, + int, + float, + complex, + list, + tuple, + range, + dict, + set, + frozenset, + bool, + bytes, + bytearray, + memoryview, +] + ENDIAN = {"little": "<", "big": ">"}[byteorder] NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] From b3a56a4044d3b1059aec0af2b9f48db818ccdd7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 17:32:15 -0500 Subject: [PATCH 5/8] TST: Added more objects Series with objects for Memory test --- pandas/conftest.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 68f3c575ee93d..deb17fb05dbfa 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -760,6 +760,29 @@ def index_or_series_obj(request): return _index_or_series_objs[request.param].copy(deep=True) +_object_series = { + f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES +} + + +_index_or_series_memory_objs = { + **indices_dict, + **_series, + **_narrow_series, + **_object_series, +} + + +@pytest.fixture(params=_index_or_series_memory_objs.keys()) +def index_or_series_memory_obj(request): + """ + Fixture for tests on indexes, series, series with a narrow dtype and + series with empty objects type + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_memory_objs[request.param].copy(deep=True) + + # ---------------------------------------------------------------- # DataFrames # ---------------------------------------------------------------- From 43ef8d21dd9868489d041184596da11f958e50d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 17:33:16 -0500 Subject: [PATCH 6/8] TST: Added more objects Series with objects for Memory test --- pandas/tests/base/test_misc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 01705ca31adcd..362df635c13fd 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -82,8 +82,8 @@ def test_ndarray_compat_properties(index_or_series_obj): @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") -def test_memory_usage(index_or_series_obj): - obj = index_or_series_obj +def test_memory_usage(index_or_series_memory_obj): + obj = index_or_series_memory_obj # Clear index caches so that len(obj) == 0 report 0 memory usage if isinstance(obj, Series): is_ser = True From a7c5839ef99823386d418596c01b7285cc79b740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Wed, 22 Mar 2023 17:33:46 -0500 Subject: [PATCH 7/8] CLN: Deleted unnecesary tests --- pandas/tests/base/test_sizeof.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 pandas/tests/base/test_sizeof.py diff --git a/pandas/tests/base/test_sizeof.py b/pandas/tests/base/test_sizeof.py deleted file mode 100644 index 00b903d5212a7..0000000000000 --- a/pandas/tests/base/test_sizeof.py +++ /dev/null @@ -1,22 +0,0 @@ -from sys import getsizeof - -from pandas import ( - DataFrame, - Series, -) - - -def test_sysof(): - getsizeof(DataFrame) - getsizeof(DataFrame()) - getsizeof(DataFrame([])) - - -def test_sysof_series(): - getsizeof(Series(str)) - getsizeof(Series(int)) - getsizeof(Series(list)) - - -def test_memory_usage_series(): - Series(str).memory_usage(deep=True) From 03df8758bc748933facee6e97ad670aa730fe910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brayan=20Mu=C3=B1oz?= Date: Thu, 23 Mar 2023 14:14:59 -0500 Subject: [PATCH 8/8] TYP: Renamed _object_series to _type_objects_series --- pandas/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index deb17fb05dbfa..22a38442dda59 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -760,7 +760,7 @@ def index_or_series_obj(request): return _index_or_series_objs[request.param].copy(deep=True) -_object_series = { +_typ_objects_series = { f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES } @@ -769,7 +769,7 @@ def index_or_series_obj(request): **indices_dict, **_series, **_narrow_series, - **_object_series, + **_typ_objects_series, }