Skip to content

Commit 0937c95

Browse files
BUG (string dtype): fix qualifier in memory usage info (#60221)
1 parent 7fe140e commit 0937c95

File tree

4 files changed

+48
-23
lines changed

4 files changed

+48
-23
lines changed

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5139,7 +5139,9 @@ def _is_memory_usage_qualified(self) -> bool:
51395139
"""
51405140
Return a boolean if we need a qualified .info display.
51415141
"""
5142-
return is_object_dtype(self.dtype)
5142+
return is_object_dtype(self.dtype) or (
5143+
is_string_dtype(self.dtype) and self.dtype.storage == "python" # type: ignore[union-attr]
5144+
)
51435145

51445146
def __contains__(self, key: Any) -> bool:
51455147
"""

pandas/core/indexes/multi.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
is_list_like,
6767
is_object_dtype,
6868
is_scalar,
69+
is_string_dtype,
6970
pandas_dtype,
7071
)
7172
from pandas.core.dtypes.dtypes import (
@@ -1425,10 +1426,12 @@ def dtype(self) -> np.dtype:
14251426
def _is_memory_usage_qualified(self) -> bool:
14261427
"""return a boolean if we need a qualified .info display"""
14271428

1428-
def f(level) -> bool:
1429-
return "mixed" in level or "string" in level or "unicode" in level
1429+
def f(dtype) -> bool:
1430+
return is_object_dtype(dtype) or (
1431+
is_string_dtype(dtype) and dtype.storage == "python"
1432+
)
14301433

1431-
return any(f(level.inferred_type) for level in self.levels)
1434+
return any(f(level.dtype) for level in self.levels)
14321435

14331436
# Cannot determine type of "memory_usage"
14341437
@doc(Index.memory_usage) # type: ignore[has-type]

pandas/tests/frame/methods/test_info.py

+23-13
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
from pandas.compat import (
1311
HAS_PYARROW,
1412
IS64,
@@ -436,18 +434,25 @@ def test_usage_via_getsizeof():
436434
assert abs(diff) < 100
437435

438436

439-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
440-
def test_info_memory_usage_qualified():
437+
def test_info_memory_usage_qualified(using_infer_string):
441438
buf = StringIO()
442439
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
443440
df.info(buf=buf)
444441
assert "+" not in buf.getvalue()
445442

446443
buf = StringIO()
447-
df = DataFrame(1, columns=list("ab"), index=list("ABC"))
444+
df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype=object))
448445
df.info(buf=buf)
449446
assert "+" in buf.getvalue()
450447

448+
buf = StringIO()
449+
df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype="str"))
450+
df.info(buf=buf)
451+
if using_infer_string and HAS_PYARROW:
452+
assert "+" not in buf.getvalue()
453+
else:
454+
assert "+" in buf.getvalue()
455+
451456
buf = StringIO()
452457
df = DataFrame(
453458
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
@@ -460,7 +465,10 @@ def test_info_memory_usage_qualified():
460465
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
461466
)
462467
df.info(buf=buf)
463-
assert "+" in buf.getvalue()
468+
if using_infer_string and HAS_PYARROW:
469+
assert "+" not in buf.getvalue()
470+
else:
471+
assert "+" in buf.getvalue()
464472

465473

466474
def test_info_memory_usage_bug_on_multiindex():
@@ -497,16 +505,15 @@ def test_info_categorical():
497505
df.info(buf=buf)
498506

499507

500-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
501508
@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
502-
def test_info_int_columns():
509+
def test_info_int_columns(using_infer_string):
503510
# GH#37245
504511
df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
505512
buf = StringIO()
506513
df.info(show_counts=True, buf=buf)
507514
result = buf.getvalue()
508515
expected = textwrap.dedent(
509-
"""\
516+
f"""\
510517
<class 'pandas.DataFrame'>
511518
Index: 2 entries, A to B
512519
Data columns (total 2 columns):
@@ -515,19 +522,22 @@ def test_info_int_columns():
515522
0 1 2 non-null int64
516523
1 2 2 non-null int64
517524
dtypes: int64(2)
518-
memory usage: 48.0+ bytes
525+
memory usage: {'50.0' if using_infer_string and HAS_PYARROW else '48.0+'} bytes
519526
"""
520527
)
521528
assert result == expected
522529

523530

524-
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
525-
def test_memory_usage_empty_no_warning():
531+
def test_memory_usage_empty_no_warning(using_infer_string):
526532
# GH#50066
527533
df = DataFrame(index=["a", "b"])
528534
with tm.assert_produces_warning(None):
529535
result = df.memory_usage()
530-
expected = Series(16 if IS64 else 8, index=["Index"])
536+
if using_infer_string and HAS_PYARROW:
537+
value = 18
538+
else:
539+
value = 16 if IS64 else 8
540+
expected = Series(value, index=["Index"])
531541
tm.assert_series_equal(result, expected)
532542

533543

pandas/tests/series/methods/test_info.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77

88
from pandas._config import using_string_dtype
99

10-
from pandas.compat import PYPY
10+
from pandas.compat import (
11+
HAS_PYARROW,
12+
PYPY,
13+
)
1114

1215
from pandas import (
1316
CategoricalIndex,
17+
Index,
1418
MultiIndex,
1519
Series,
1620
date_range,
@@ -41,7 +45,9 @@ def test_info_categorical():
4145

4246

4347
@pytest.mark.parametrize("verbose", [True, False])
44-
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
48+
def test_info_series(
49+
lexsorted_two_level_string_multiindex, verbose, using_infer_string
50+
):
4551
index = lexsorted_two_level_string_multiindex
4652
ser = Series(range(len(index)), index=index, name="sth")
4753
buf = StringIO()
@@ -63,10 +69,11 @@ def test_info_series(lexsorted_two_level_string_multiindex, verbose):
6369
10 non-null int64
6470
"""
6571
)
72+
qualifier = "" if using_infer_string and HAS_PYARROW else "+"
6673
expected += textwrap.dedent(
6774
f"""\
6875
dtypes: int64(1)
69-
memory usage: {ser.memory_usage()}.0+ bytes
76+
memory usage: {ser.memory_usage()}.0{qualifier} bytes
7077
"""
7178
)
7279
assert result == expected
@@ -142,14 +149,17 @@ def test_info_memory_usage_deep_pypy():
142149
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
143150

144151

145-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
146152
@pytest.mark.parametrize(
147153
"index, plus",
148154
[
149155
([1, 2, 3], False),
150-
(list("ABC"), True),
156+
(Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)),
157+
(Index(list("ABC"), dtype=object), True),
151158
(MultiIndex.from_product([range(3), range(3)]), False),
152-
(MultiIndex.from_product([range(3), ["foo", "bar"]]), True),
159+
(
160+
MultiIndex.from_product([range(3), ["foo", "bar"]]),
161+
not (using_string_dtype() and HAS_PYARROW),
162+
),
153163
],
154164
)
155165
def test_info_memory_usage_qualified(index, plus):

0 commit comments

Comments
 (0)