From fcb4de1692e34047c343b02c31cf4f5150fd1cca Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Tue, 5 May 2020 11:15:40 +0900 Subject: [PATCH 1/3] BUG: memory_usage method with deep of StringArray is wrong --- pandas/core/arrays/string_.py | 6 ++++++ pandas/tests/extension/test_string.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index dbca8e74f5e1b..2dcdb343b5631 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -289,6 +289,12 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna).astype("Int64") + def memory_usage(self, deep=False): + result = self._ndarray.nbytes + if deep: + return result + lib.memory_usage_of_objects(self._ndarray) + return result + # Override parent because we have different return types. @classmethod def _create_arithmetic_method(cls, op): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 86aed671f1b88..b7a7fe136447a 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -85,6 +85,14 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) + def test_memory_usage(self, data): + # GH 33963 + series = pd.Series(data, dtype="string") + + assert ( + 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + ) + class TestCasting(base.BaseCastingTests): pass From 697d98e38b968e19a4fc6aa35e75f93a319de76b Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Tue, 5 May 2020 21:41:20 +0900 Subject: [PATCH 2/3] move specific test to tests/arrays --- pandas/tests/arrays/string_/test_string.py | 7 +++++++ pandas/tests/extension/test_string.py | 8 -------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index fe770eed84b62..dc1a4bc585c1b 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -277,3 +277,10 @@ def test_value_counts_na(): result = arr.value_counts(dropna=True) expected = pd.Series([2, 1], index=["a", "b"], dtype="Int64") tm.assert_series_equal(result, expected) + + +def test_memory_usage(): + # GH 33963 + series = pd.Series(["a", "b", "c"], dtype="string") + + assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index b7a7fe136447a..86aed671f1b88 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -85,14 +85,6 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) - def test_memory_usage(self, data): - # GH 33963 - series = pd.Series(data, dtype="string") - - assert ( - 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) - ) - class TestCasting(base.BaseCastingTests): pass From 06d180262ebae6b958a61dca2f2776ee6ec8c46e Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Tue, 5 May 2020 21:54:25 +0900 Subject: [PATCH 3/3] add a note in bugfix section --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 82c43811c0444..9d7539d8cbde9 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -584,7 +584,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed bug where :meth:`Serires.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`) -- +- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) Other