diff --git a/doc/source/release.rst b/doc/source/release.rst index 0666eb7f88675..bd141d4d905e1 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -72,6 +72,7 @@ Improvements to existing features - perf improvements in Series datetime/timedelta binary operations (:issue:`5801`) - `option_context` context manager now available as top-level API (:issue:`5752`) - df.info() view now display dtype info per column (:issue: `5682`) + - df.info() now honors option max_info_rows, disable null counts for large frames (:issue: `5974`) - perf improvements in DataFrame ``count/dropna`` for ``axis=1`` - Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue: `5879`) - support ``dtypes`` on ``Panel`` diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index e4d4ea74ac169..c617c58c527a8 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -166,11 +166,9 @@ pc_max_info_rows_doc = """ : int or None - Deprecated. -""" - -pc_max_info_rows_deprecation_warning = """\ -max_info_rows has been deprecated, as reprs no longer use the info view. + df.info() will usually show null-counts for each column. + For large frames this can be quite slow. max_info_rows and max_info_cols + limit this null check only to frames with smaller dimensions then specified. """ pc_large_repr_doc = """ @@ -266,9 +264,6 @@ def mpl_style_cb(key): msg=pc_height_deprecation_warning, rkey='display.max_rows') -cf.deprecate_option('display.max_info_rows', - msg=pc_max_info_rows_deprecation_warning) - tc_sim_interactive_doc = """ : boolean Whether to simulate interactive mode for purposes of testing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61d59e8f93c83..01f854aabb8d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1419,20 +1419,35 @@ def info(self, verbose=True, buf=None, max_cols=None): max_cols = get_option( 'display.max_info_columns', len(self.columns) + 1) - if verbose and len(self.columns) <= max_cols: + max_rows = get_option('display.max_info_rows', len(self) + 1) + + show_counts = ((len(self.columns) <= max_cols) and + (len(self) < max_rows)) + if verbose: lines.append('Data columns (total %d columns):' % len(self.columns)) space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 - counts = self.count() - if len(cols) != len(counts): # pragma: no cover - raise AssertionError('Columns must equal counts (%d != %d)' % - (len(cols), len(counts))) + counts = None + + tmpl = "%s%s" + if show_counts: + counts = self.count() + if len(cols) != len(counts): # pragma: no cover + raise AssertionError('Columns must equal counts (%d != %d)' % + (len(cols), len(counts))) + tmpl = "%s non-null %s" + dtypes = self.dtypes - for col, count in compat.iteritems(counts): + for i, col in enumerate(self.columns): dtype = dtypes[col] col = com.pprint_thing(col) + + count= "" + if show_counts: + count = counts[i] + lines.append(_put_str(col, space) + - '%d non-null %s' % (count, dtype)) + tmpl % (count, dtype)) else: lines.append(self.columns.summary(name='Columns'))