BUG: a qualifer (+) would always display with a MultiIndex, regardless

jreback · jreback · commit c8563a620e74 · 2017-01-31T16:22:36.000-05:00
if it needed deep introspection for memory usage
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -433,7 +433,7 @@ Bug Fixes
 - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
 
 - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`)
-
+- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
 - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)
 - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1752,7 +1752,8 @@ def _sizeof_fmt(num, size_qualifier):
                 # all cases (e.g., it misses categorical data even with object
                 # categories)
                 deep = False
-                if 'object' in counts or is_object_dtype(self.index):
+                if ('object' in counts or
+                        self.index.is_memory_usage_qualified()):
                     size_qualifier = '+'
             mem_usage = self.memory_usage(index=True, deep=deep).sum()
             lines.append("memory usage: %s\n" %
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -1429,6 +1429,10 @@ def inferred_type(self):
         """ return a string of the type inferred from the values """
         return lib.infer_dtype(self)
 
+    def is_memory_usage_qualified(self):
+        """ return a boolean if we need a qualified .info display """
+        return self.is_object()
+
     def is_type_compatible(self, kind):
         return kind == self.inferred_type
 
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
@@ -428,6 +428,12 @@ def _shallow_copy(self, values=None, **kwargs):
     def dtype(self):
         return np.dtype('O')
 
+    def is_memory_usage_qualified(self):
+        """ return a boolean if we need a qualified .info display """
+        def f(l):
+            return 'mixed' in l or 'string' in l or 'unicode' in l
+        return any([f(l) for l in self._inferred_type_levels])
+
     @Appender(Index.memory_usage.__doc__)
     def memory_usage(self, deep=False):
         # we are overwriting our base class to avoid
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
@@ -303,22 +303,26 @@ def test_info_memory_usage(self):
             data[i] = np.random.randint(2, size=n).astype(dtype)
         df = DataFrame(data)
         buf = StringIO()
+
         # display memory usage case
         df.info(buf=buf, memory_usage=True)
         res = buf.getvalue().splitlines()
         self.assertTrue("memory usage: " in res[-1])
+
         # do not display memory usage cas
         df.info(buf=buf, memory_usage=False)
         res = buf.getvalue().splitlines()
         self.assertTrue("memory usage: " not in res[-1])
 
         df.info(buf=buf, memory_usage=True)
         res = buf.getvalue().splitlines()
+
         # memory usage is a lower bound, so print it as XYZ+ MB
         self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1]))
 
         df.iloc[:, :5].info(buf=buf, memory_usage=True)
         res = buf.getvalue().splitlines()
+
         # excluded column with object dtype, so estimate is accurate
         self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1]))
 
@@ -382,6 +386,34 @@ def test_info_memory_usage(self):
         diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
         self.assertTrue(abs(diff) < 100)
 
+    def test_info_memory_usage_qualified(self):
+
+        buf = StringIO()
+        df = DataFrame(1, columns=list('ab'),
+                       index=[1, 2, 3])
+        df.info(buf=buf)
+        self.assertFalse('+' in buf.getvalue())
+
+        buf = StringIO()
+        df = DataFrame(1, columns=list('ab'),
+                       index=list('ABC'))
+        df.info(buf=buf)
+        self.assertTrue('+' in buf.getvalue())
+
+        buf = StringIO()
+        df = DataFrame(1, columns=list('ab'),
+                       index=pd.MultiIndex.from_product(
+                           [range(3), range(3)]))
+        df.info(buf=buf)
+        self.assertFalse('+' in buf.getvalue())
+
+        buf = StringIO()
+        df = DataFrame(1, columns=list('ab'),
+                       index=pd.MultiIndex.from_product(
+                           [range(3), ['foo', 'bar']]))
+        df.info(buf=buf)
+        self.assertTrue('+' in buf.getvalue())
+
     def test_info_memory_usage_bug_on_multiindex(self):
         # GH 14308
         # memory usage introspection should not materialize .values