@@ -1970,33 +1970,132 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
1970
1970
def info (self , verbose = None , buf = None , max_cols = None , memory_usage = None ,
1971
1971
null_counts = None ):
1972
1972
"""
1973
- Concise summary of a DataFrame.
1973
+ Print a concise summary of a DataFrame.
1974
+
1975
+ This method prints information about a DataFrame including
1976
+ the index dtype and column dtypes, non-null values and memory usage.
1974
1977
1975
1978
Parameters
1976
1979
----------
1977
- verbose : {None, True, False}, optional
1978
- Whether to print the full summary.
1979
- None follows the `display.max_info_columns` setting.
1980
- True or False overrides the `display.max_info_columns` setting.
1980
+ verbose : bool, optional
1981
+ Whether to print the full summary. By default, the setting in
1982
+ ``pandas.options.display.max_info_columns`` is followed.
1981
1983
buf : writable buffer, defaults to sys.stdout
1982
- max_cols : int, default None
1983
- Determines whether full summary or short summary is printed.
1984
- None follows the `display.max_info_columns` setting.
1985
- memory_usage : boolean/string, default None
1984
+ Where to send the output. By default, the output is printed to
1985
+ sys.stdout. Pass a writable buffer if you need to further process
1986
+ the output.
1987
+ max_cols : int, optional
1988
+ When to switch from the verbose to the truncated output. If the
1989
+ DataFrame has more than `max_cols` columns, the truncated output
1990
+ is used. By default, the setting in
1991
+ ``pandas.options.display.max_info_columns`` is used.
1992
+ memory_usage : bool, str, optional
1986
1993
Specifies whether total memory usage of the DataFrame
1987
- elements (including index) should be displayed. None follows
1988
- the `display.memory_usage` setting. True or False overrides
1989
- the `display.memory_usage` setting. A value of 'deep' is equivalent
1990
- of True, with deep introspection. Memory usage is shown in
1991
- human-readable units (base-2 representation).
1992
- null_counts : boolean, default None
1993
- Whether to show the non-null counts
1994
-
1995
- - If None, then only show if the frame is smaller than
1996
- max_info_rows and max_info_columns.
1997
- - If True, always show counts.
1998
- - If False, never show counts.
1994
+ elements (including the index) should be displayed. By default,
1995
+ this follows the ``pandas.options.display.memory_usage`` setting.
1996
+
1997
+ True always show memory usage. False never shows memory usage.
1998
+ A value of 'deep' is equivalent to "True with deep introspection".
1999
+ Memory usage is shown in human-readable units (base-2
2000
+ representation). Without deep introspection a memory estimation is
2001
+ made based in column dtype and number of rows assuming values
2002
+ consume the same memory amount for corresponding dtypes. With deep
2003
+ memory introspection, a real memory usage calculation is performed
2004
+ at the cost of computational resources.
2005
+ null_counts : bool, optional
2006
+ Whether to show the non-null counts. By default, this is shown
2007
+ only if the frame is smaller than
2008
+ ``pandas.options.display.max_info_rows`` and
2009
+ ``pandas.options.display.max_info_columns``. A value of True always
2010
+ shows the counts, and False never shows the counts.
2011
+
2012
+ Returns
2013
+ -------
2014
+ None
2015
+ This method prints a summary of a DataFrame and returns None.
1999
2016
2017
+ See Also
2018
+ --------
2019
+ DataFrame.describe: Generate descriptive statistics of DataFrame
2020
+ columns.
2021
+ DataFrame.memory_usage: Memory usage of DataFrame columns.
2022
+
2023
+ Examples
2024
+ --------
2025
+ >>> int_values = [1, 2, 3, 4, 5]
2026
+ >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
2027
+ >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
2028
+ >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
2029
+ ... "float_col": float_values})
2030
+ >>> df
2031
+ int_col text_col float_col
2032
+ 0 1 alpha 0.00
2033
+ 1 2 beta 0.25
2034
+ 2 3 gamma 0.50
2035
+ 3 4 delta 0.75
2036
+ 4 5 epsilon 1.00
2037
+
2038
+ Prints information of all columns:
2039
+
2040
+ >>> df.info(verbose=True)
2041
+ <class 'pandas.core.frame.DataFrame'>
2042
+ RangeIndex: 5 entries, 0 to 4
2043
+ Data columns (total 3 columns):
2044
+ int_col 5 non-null int64
2045
+ text_col 5 non-null object
2046
+ float_col 5 non-null float64
2047
+ dtypes: float64(1), int64(1), object(1)
2048
+ memory usage: 200.0+ bytes
2049
+
2050
+ Prints a summary of columns count and its dtypes but not per column
2051
+ information:
2052
+
2053
+ >>> df.info(verbose=False)
2054
+ <class 'pandas.core.frame.DataFrame'>
2055
+ RangeIndex: 5 entries, 0 to 4
2056
+ Columns: 3 entries, int_col to float_col
2057
+ dtypes: float64(1), int64(1), object(1)
2058
+ memory usage: 200.0+ bytes
2059
+
2060
+ Pipe output of DataFrame.info to buffer instead of sys.stdout, get
2061
+ buffer content and writes to a text file:
2062
+
2063
+ >>> import io
2064
+ >>> buffer = io.StringIO()
2065
+ >>> df.info(buf=buffer)
2066
+ >>> s = buffer.getvalue()
2067
+ >>> with open("df_info.txt", "w", encoding="utf-8") as f:
2068
+ ... f.write(s)
2069
+ 260
2070
+
2071
+ The `memory_usage` parameter allows deep introspection mode, specially
2072
+ useful for big DataFrames and fine-tune memory optimization:
2073
+
2074
+ >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
2075
+ >>> df = pd.DataFrame({
2076
+ ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2077
+ ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2078
+ ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
2079
+ ... })
2080
+ >>> df.info()
2081
+ <class 'pandas.core.frame.DataFrame'>
2082
+ RangeIndex: 1000000 entries, 0 to 999999
2083
+ Data columns (total 3 columns):
2084
+ column_1 1000000 non-null object
2085
+ column_2 1000000 non-null object
2086
+ column_3 1000000 non-null object
2087
+ dtypes: object(3)
2088
+ memory usage: 22.9+ MB
2089
+
2090
+ >>> df.info(memory_usage='deep')
2091
+ <class 'pandas.core.frame.DataFrame'>
2092
+ RangeIndex: 1000000 entries, 0 to 999999
2093
+ Data columns (total 3 columns):
2094
+ column_1 1000000 non-null object
2095
+ column_2 1000000 non-null object
2096
+ column_3 1000000 non-null object
2097
+ dtypes: object(3)
2098
+ memory usage: 188.8 MB
2000
2099
"""
2001
2100
2002
2101
if buf is None : # pragma: no cover
0 commit comments