diff --git a/doc/source/release.rst b/doc/source/release.rst index 50c79b8e8a2bf..973dfd73307f2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -515,6 +515,7 @@ Bug Fixes - Bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the `large_repr` set to 'info' (:issue:`7105`) - Bug in ``DatetimeIndex`` specifying ``freq`` raises ``ValueError`` when passed value is too short (:issue:`7098`) +- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 4c98cb685c901..8182bff92fb63 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -210,6 +210,7 @@ API changes # this now raises for arith ops like ``+``, ``*``, etc. NotImplementedError: operator '*' not implemented for bool dtypes + .. _whatsnew_0140.display: Display Changes @@ -239,6 +240,11 @@ Display Changes length of the series (:issue:`7101`) - Fixed a bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the `large_repr` set to 'info' (:issue:`7105`) +- The `verbose` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` + representation, is now ``None`` by default. This will follow the global setting in + ``display.max_info_columns``. The global setting can be overriden with ``verbose=True`` or + ``verbose=False``. +- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) .. _whatsnew_0140.groupby: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d20cec7aa79ee..1b77a87b0d94a 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1392,17 +1392,20 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, if buf is None: return formatter.buf.getvalue() - def info(self, verbose=True, buf=None, max_cols=None): + def info(self, verbose=None, buf=None, max_cols=None): """ Concise summary of a DataFrame. Parameters ---------- - verbose : boolean, default True - If False, don't print column count summary + verbose : {None, True, False}, optional + Whether to print the full summary. + None follows the `display.max_info_columns` setting. + True or False overrides the `display.max_info_columns` setting. buf : writable buffer, defaults to sys.stdout max_cols : int, default None - Determines whether full summary or short summary is printed + Determines whether full summary or short summary is printed. + None follows the `display.max_info_columns` setting. """ from pandas.core.format import _put_lines @@ -1429,8 +1432,10 @@ def info(self, verbose=True, buf=None, max_cols=None): max_rows = get_option('display.max_info_rows', len(self) + 1) show_counts = ((len(self.columns) <= max_cols) and - (len(self) < max_rows)) - if verbose: + (len(self) < max_rows)) + exceeds_info_cols = len(self.columns) > max_cols + + def _verbose_repr(): lines.append('Data columns (total %d columns):' % len(self.columns)) space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 @@ -1442,22 +1447,33 @@ def info(self, verbose=True, buf=None, max_cols=None): if len(cols) != len(counts): # pragma: no cover raise AssertionError('Columns must equal counts (%d != %d)' % (len(cols), len(counts))) - tmpl = "%s non-null %s" + tmpl = "%s non-null %s" dtypes = self.dtypes for i, col in enumerate(self.columns): dtype = dtypes[col] col = com.pprint_thing(col) - count= "" + count = "" if show_counts: count = counts.iloc[i] lines.append(_put_str(col, space) + tmpl % (count, dtype)) - else: + + def _non_verbose_repr(): lines.append(self.columns.summary(name='Columns')) + if verbose: + _verbose_repr() + elif verbose is False: # specifically set to False, not nesc None + _non_verbose_repr() + else: + if exceeds_info_cols: + _non_verbose_repr() + else: + _verbose_repr() + counts = self.get_dtype_counts() dtypes = ['%s(%d)' % k for k in sorted(compat.iteritems(counts))] lines.append('dtypes: %s' % ', '.join(dtypes)) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index f26ea0755ad46..f61bda686c88b 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -38,6 +38,12 @@ def has_info_repr(df): c2 = r.split('\n')[0].startswith(r"<class") # _repr_html_ return c1 or c2 +def has_non_verbose_info_repr(df): + has_info = has_info_repr(df) + r = repr(df) + nv = len(r.split('\n')) == 4 # 1. , 2. Index, 3. Columns, 4. dtype + return has_info and nv + def has_horizontally_truncated_repr(df): r = repr(df) return any(l.strip().endswith('...') for l in r.splitlines()) @@ -1573,6 +1579,22 @@ def test_info_repr(self): with option_context('display.large_repr', 'info'): assert has_info_repr(df) + def test_info_repr_max_cols(self): + # GH #6939 + df = DataFrame(randn(10, 5)) + with option_context('display.large_repr', 'info', + 'display.max_columns', 1, + 'display.max_info_columns', 4): + self.assertTrue(has_non_verbose_info_repr(df)) + + with option_context('display.large_repr', 'info', + 'display.max_columns', 1, + 'display.max_info_columns', 5): + self.assertFalse(has_non_verbose_info_repr(df)) + + # test verbose overrides + # fmt.set_option('display.max_info_columns', 4) # exceeded + def test_info_repr_html(self): max_rows = get_option('display.max_rows') max_cols = get_option('display.max_columns') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0c1745b41f089..8266feb112ed2 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -6326,6 +6326,41 @@ def test_info_shows_column_dtypes(self): name = '%d %d non-null %s' % (i, n, dtype) assert name in res + def test_info_max_cols(self): + df = DataFrame(np.random.randn(10, 5)) + for len_, verbose in [(4, None), (4, False), (9, True)]: + # For verbose always ^ setting ^ summarize ^ full output + with pd.option_context('max_info_columns', 4): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + self.assertEqual(len(res.split('\n')), len_) + + for len_, verbose in [(9, None), (4, False), (9, True)]: + + # max_cols no exceeded + with pd.option_context('max_info_columns', 5): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + self.assertEqual(len(res.split('\n')), len_) + + for len_, max_cols in [(9, 5), (4, 4)]: + # setting truncates + with pd.option_context('max_info_columns', 4): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + self.assertEqual(len(res.split('\n')), len_) + + # setting wouldn't truncate + with pd.option_context('max_info_columns', 5): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + self.assertEqual(len(res.split('\n')), len_) + + def test_dtypes(self): self.mixed_frame['bool'] = self.mixed_frame['A'] > 0 result = self.mixed_frame.dtypes