Skip to content

Commit 64f9907

Browse files
[backport 2.3.x] BUG (string dtype): fix qualifier in memory usage info (#60221) (#60231)
(cherry picked from commit 0937c95)
1 parent 168e353 commit 64f9907

File tree

4 files changed

+52
-25
lines changed

4 files changed

+52
-25
lines changed

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5326,7 +5326,9 @@ def _is_memory_usage_qualified(self) -> bool:
53265326
"""
53275327
Return a boolean if we need a qualified .info display.
53285328
"""
5329-
return is_object_dtype(self.dtype)
5329+
return is_object_dtype(self.dtype) or (
5330+
is_string_dtype(self.dtype) and self.dtype.storage == "python" # type: ignore[union-attr]
5331+
)
53305332

53315333
def __contains__(self, key: Any) -> bool:
53325334
"""

pandas/core/indexes/multi.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
is_list_like,
6666
is_object_dtype,
6767
is_scalar,
68+
is_string_dtype,
6869
pandas_dtype,
6970
)
7071
from pandas.core.dtypes.dtypes import (
@@ -1344,10 +1345,12 @@ def dtype(self) -> np.dtype:
13441345
def _is_memory_usage_qualified(self) -> bool:
13451346
"""return a boolean if we need a qualified .info display"""
13461347

1347-
def f(level) -> bool:
1348-
return "mixed" in level or "string" in level or "unicode" in level
1348+
def f(dtype) -> bool:
1349+
return is_object_dtype(dtype) or (
1350+
is_string_dtype(dtype) and dtype.storage == "python"
1351+
)
13491352

1350-
return any(f(level) for level in self._inferred_type_levels)
1353+
return any(f(level.dtype) for level in self.levels)
13511354

13521355
# Cannot determine type of "memory_usage"
13531356
@doc(Index.memory_usage) # type: ignore[has-type]

pandas/tests/frame/methods/test_info.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pandas._config import using_string_dtype
1111

1212
from pandas.compat import (
13+
HAS_PYARROW,
1314
IS64,
1415
PYPY,
1516
)
@@ -435,18 +436,25 @@ def test_usage_via_getsizeof():
435436
assert abs(diff) < 100
436437

437438

438-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
439-
def test_info_memory_usage_qualified():
439+
def test_info_memory_usage_qualified(using_infer_string):
440440
buf = StringIO()
441441
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
442442
df.info(buf=buf)
443443
assert "+" not in buf.getvalue()
444444

445445
buf = StringIO()
446-
df = DataFrame(1, columns=list("ab"), index=list("ABC"))
446+
df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype=object))
447447
df.info(buf=buf)
448448
assert "+" in buf.getvalue()
449449

450+
buf = StringIO()
451+
df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype="str"))
452+
df.info(buf=buf)
453+
if using_infer_string and HAS_PYARROW:
454+
assert "+" not in buf.getvalue()
455+
else:
456+
assert "+" in buf.getvalue()
457+
450458
buf = StringIO()
451459
df = DataFrame(
452460
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
@@ -459,7 +467,10 @@ def test_info_memory_usage_qualified():
459467
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
460468
)
461469
df.info(buf=buf)
462-
assert "+" in buf.getvalue()
470+
if using_infer_string and HAS_PYARROW:
471+
assert "+" not in buf.getvalue()
472+
else:
473+
assert "+" in buf.getvalue()
463474

464475

465476
def test_info_memory_usage_bug_on_multiindex():
@@ -496,16 +507,15 @@ def test_info_categorical():
496507
df.info(buf=buf)
497508

498509

499-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
500510
@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
501-
def test_info_int_columns():
511+
def test_info_int_columns(using_infer_string):
502512
# GH#37245
503513
df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
504514
buf = StringIO()
505515
df.info(show_counts=True, buf=buf)
506516
result = buf.getvalue()
507517
expected = textwrap.dedent(
508-
"""\
518+
f"""\
509519
<class 'pandas.core.frame.DataFrame'>
510520
Index: 2 entries, A to B
511521
Data columns (total 2 columns):
@@ -514,19 +524,23 @@ def test_info_int_columns():
514524
0 1 2 non-null int64
515525
1 2 2 non-null int64
516526
dtypes: int64(2)
517-
memory usage: 48.0+ bytes
527+
memory usage: {'50.0' if using_infer_string and HAS_PYARROW else '48.0+'} bytes
518528
"""
519529
)
520530
assert result == expected
521531

522532

523533
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
524-
def test_memory_usage_empty_no_warning():
534+
def test_memory_usage_empty_no_warning(using_infer_string):
525535
# GH#50066
526536
df = DataFrame(index=["a", "b"])
527537
with tm.assert_produces_warning(None):
528538
result = df.memory_usage()
529-
expected = Series(16 if IS64 else 8, index=["Index"])
539+
if using_infer_string and HAS_PYARROW:
540+
value = 18
541+
else:
542+
value = 16 if IS64 else 8
543+
expected = Series(value, index=["Index"])
530544
tm.assert_series_equal(result, expected)
531545

532546

pandas/tests/series/methods/test_info.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77

88
from pandas._config import using_string_dtype
99

10-
from pandas.compat import PYPY
10+
from pandas.compat import (
11+
HAS_PYARROW,
12+
PYPY,
13+
)
1114

1215
from pandas import (
1316
CategoricalIndex,
17+
Index,
1418
MultiIndex,
1519
Series,
1620
date_range,
@@ -41,7 +45,9 @@ def test_info_categorical():
4145

4246

4347
@pytest.mark.parametrize("verbose", [True, False])
44-
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
48+
def test_info_series(
49+
lexsorted_two_level_string_multiindex, verbose, using_infer_string
50+
):
4551
index = lexsorted_two_level_string_multiindex
4652
ser = Series(range(len(index)), index=index, name="sth")
4753
buf = StringIO()
@@ -63,10 +69,11 @@ def test_info_series(lexsorted_two_level_string_multiindex, verbose):
6369
10 non-null int64
6470
"""
6571
)
72+
qualifier = "" if using_infer_string and HAS_PYARROW else "+"
6673
expected += textwrap.dedent(
6774
f"""\
6875
dtypes: int64(1)
69-
memory usage: {ser.memory_usage()}.0+ bytes
76+
memory usage: {ser.memory_usage()}.0{qualifier} bytes
7077
"""
7178
)
7279
assert result == expected
@@ -142,20 +149,21 @@ def test_info_memory_usage_deep_pypy():
142149
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
143150

144151

145-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
146152
@pytest.mark.parametrize(
147-
"series, plus",
153+
"index, plus",
148154
[
149-
(Series(1, index=[1, 2, 3]), False),
150-
(Series(1, index=list("ABC")), True),
151-
(Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
155+
([1, 2, 3], False),
156+
(Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)),
157+
(Index(list("ABC"), dtype=object), True),
158+
(MultiIndex.from_product([range(3), range(3)]), False),
152159
(
153-
Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
154-
True,
160+
MultiIndex.from_product([range(3), ["foo", "bar"]]),
161+
not (using_string_dtype() and HAS_PYARROW),
155162
),
156163
],
157164
)
158-
def test_info_memory_usage_qualified(series, plus):
165+
def test_info_memory_usage_qualified(index, plus):
166+
series = Series(1, index=index)
159167
buf = StringIO()
160168
series.info(buf=buf)
161169
if plus:

0 commit comments

Comments
 (0)