Skip to content

Commit c8563a6

Browse files
committed
BUG: a qualifer (+) would always display with a MultiIndex, regardless
if it needed deep introspection for memory usage
1 parent 99f7cc1 commit c8563a6

File tree

5 files changed

+45
-2
lines changed

5 files changed

+45
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ Bug Fixes
433433
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
434434

435435
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`)
436-
436+
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
437437
- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)
438438
- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`)
439439

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1752,7 +1752,8 @@ def _sizeof_fmt(num, size_qualifier):
17521752
# all cases (e.g., it misses categorical data even with object
17531753
# categories)
17541754
deep = False
1755-
if 'object' in counts or is_object_dtype(self.index):
1755+
if ('object' in counts or
1756+
self.index.is_memory_usage_qualified()):
17561757
size_qualifier = '+'
17571758
mem_usage = self.memory_usage(index=True, deep=deep).sum()
17581759
lines.append("memory usage: %s\n" %

pandas/indexes/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,10 @@ def inferred_type(self):
14291429
""" return a string of the type inferred from the values """
14301430
return lib.infer_dtype(self)
14311431

1432+
def is_memory_usage_qualified(self):
1433+
""" return a boolean if we need a qualified .info display """
1434+
return self.is_object()
1435+
14321436
def is_type_compatible(self, kind):
14331437
return kind == self.inferred_type
14341438

pandas/indexes/multi.py

+6
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,12 @@ def _shallow_copy(self, values=None, **kwargs):
428428
def dtype(self):
429429
return np.dtype('O')
430430

431+
def is_memory_usage_qualified(self):
432+
""" return a boolean if we need a qualified .info display """
433+
def f(l):
434+
return 'mixed' in l or 'string' in l or 'unicode' in l
435+
return any([f(l) for l in self._inferred_type_levels])
436+
431437
@Appender(Index.memory_usage.__doc__)
432438
def memory_usage(self, deep=False):
433439
# we are overwriting our base class to avoid

pandas/tests/frame/test_repr_info.py

+32
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,26 @@ def test_info_memory_usage(self):
303303
data[i] = np.random.randint(2, size=n).astype(dtype)
304304
df = DataFrame(data)
305305
buf = StringIO()
306+
306307
# display memory usage case
307308
df.info(buf=buf, memory_usage=True)
308309
res = buf.getvalue().splitlines()
309310
self.assertTrue("memory usage: " in res[-1])
311+
310312
# do not display memory usage cas
311313
df.info(buf=buf, memory_usage=False)
312314
res = buf.getvalue().splitlines()
313315
self.assertTrue("memory usage: " not in res[-1])
314316

315317
df.info(buf=buf, memory_usage=True)
316318
res = buf.getvalue().splitlines()
319+
317320
# memory usage is a lower bound, so print it as XYZ+ MB
318321
self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1]))
319322

320323
df.iloc[:, :5].info(buf=buf, memory_usage=True)
321324
res = buf.getvalue().splitlines()
325+
322326
# excluded column with object dtype, so estimate is accurate
323327
self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1]))
324328

@@ -382,6 +386,34 @@ def test_info_memory_usage(self):
382386
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
383387
self.assertTrue(abs(diff) < 100)
384388

389+
def test_info_memory_usage_qualified(self):
390+
391+
buf = StringIO()
392+
df = DataFrame(1, columns=list('ab'),
393+
index=[1, 2, 3])
394+
df.info(buf=buf)
395+
self.assertFalse('+' in buf.getvalue())
396+
397+
buf = StringIO()
398+
df = DataFrame(1, columns=list('ab'),
399+
index=list('ABC'))
400+
df.info(buf=buf)
401+
self.assertTrue('+' in buf.getvalue())
402+
403+
buf = StringIO()
404+
df = DataFrame(1, columns=list('ab'),
405+
index=pd.MultiIndex.from_product(
406+
[range(3), range(3)]))
407+
df.info(buf=buf)
408+
self.assertFalse('+' in buf.getvalue())
409+
410+
buf = StringIO()
411+
df = DataFrame(1, columns=list('ab'),
412+
index=pd.MultiIndex.from_product(
413+
[range(3), ['foo', 'bar']]))
414+
df.info(buf=buf)
415+
self.assertTrue('+' in buf.getvalue())
416+
385417
def test_info_memory_usage_bug_on_multiindex(self):
386418
# GH 14308
387419
# memory usage introspection should not materialize .values

0 commit comments

Comments
 (0)