Skip to content

Commit 9a5cd86

Browse files
author
Marco Gorelli
committed
Move info and tests to their own files in IO/formats
1 parent ebeb407 commit 9a5cd86

File tree

6 files changed

+690
-666
lines changed

6 files changed

+690
-666
lines changed

pandas/conftest.py

+29
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,35 @@ def axis_series(request):
112112
return request.param
113113

114114

115+
@pytest.fixture
116+
def datetime_frame():
117+
"""
118+
Fixture for DataFrame of floats with DatetimeIndex
119+
120+
Columns are ['A', 'B', 'C', 'D']
121+
122+
A B C D
123+
2000-01-03 -1.122153 0.468535 0.122226 1.693711
124+
2000-01-04 0.189378 0.486100 0.007864 -1.216052
125+
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
126+
2000-01-06 0.430050 0.894352 0.090719 0.036939
127+
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
128+
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
129+
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
130+
... ... ... ... ...
131+
2000-02-03 1.642618 -0.579288 0.046005 1.385249
132+
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
133+
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
134+
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
135+
2000-02-09 1.377373 0.398619 1.008453 -0.928207
136+
2000-02-10 0.473194 -0.636677 0.984058 0.511519
137+
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948
138+
139+
[30 rows x 4 columns]
140+
"""
141+
return DataFrame(tm.getTimeSeriesData())
142+
143+
115144
@pytest.fixture
116145
def ip():
117146
"""

pandas/core/frame.py

+3-280
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import datetime
1515
from io import StringIO
1616
import itertools
17-
import sys
1817
from textwrap import dedent
1918
from typing import (
2019
IO,
@@ -131,7 +130,7 @@
131130

132131
from pandas.io.common import get_filepath_or_buffer
133132
from pandas.io.formats import console, format as fmt
134-
from pandas.io.formats.printing import pprint_thing
133+
from pandas.io.formats.info import info
135134
import pandas.plotting
136135

137136
if TYPE_CHECKING:
@@ -2225,282 +2224,10 @@ def to_html(
22252224
)
22262225

22272226
# ----------------------------------------------------------------------
2228-
22292227
def info(
22302228
self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
2231-
) -> None:
2232-
"""
2233-
Print a concise summary of a DataFrame.
2234-
2235-
This method prints information about a DataFrame including
2236-
the index dtype and column dtypes, non-null values and memory usage.
2237-
2238-
Parameters
2239-
----------
2240-
verbose : bool, optional
2241-
Whether to print the full summary. By default, the setting in
2242-
``pandas.options.display.max_info_columns`` is followed.
2243-
buf : writable buffer, defaults to sys.stdout
2244-
Where to send the output. By default, the output is printed to
2245-
sys.stdout. Pass a writable buffer if you need to further process
2246-
the output.
2247-
max_cols : int, optional
2248-
When to switch from the verbose to the truncated output. If the
2249-
DataFrame has more than `max_cols` columns, the truncated output
2250-
is used. By default, the setting in
2251-
``pandas.options.display.max_info_columns`` is used.
2252-
memory_usage : bool, str, optional
2253-
Specifies whether total memory usage of the DataFrame
2254-
elements (including the index) should be displayed. By default,
2255-
this follows the ``pandas.options.display.memory_usage`` setting.
2256-
2257-
True always show memory usage. False never shows memory usage.
2258-
A value of 'deep' is equivalent to "True with deep introspection".
2259-
Memory usage is shown in human-readable units (base-2
2260-
representation). Without deep introspection a memory estimation is
2261-
made based in column dtype and number of rows assuming values
2262-
consume the same memory amount for corresponding dtypes. With deep
2263-
memory introspection, a real memory usage calculation is performed
2264-
at the cost of computational resources.
2265-
null_counts : bool, optional
2266-
Whether to show the non-null counts. By default, this is shown
2267-
only if the frame is smaller than
2268-
``pandas.options.display.max_info_rows`` and
2269-
``pandas.options.display.max_info_columns``. A value of True always
2270-
shows the counts, and False never shows the counts.
2271-
2272-
Returns
2273-
-------
2274-
None
2275-
This method prints a summary of a DataFrame and returns None.
2276-
2277-
See Also
2278-
--------
2279-
DataFrame.describe: Generate descriptive statistics of DataFrame
2280-
columns.
2281-
DataFrame.memory_usage: Memory usage of DataFrame columns.
2282-
2283-
Examples
2284-
--------
2285-
>>> int_values = [1, 2, 3, 4, 5]
2286-
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
2287-
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
2288-
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
2289-
... "float_col": float_values})
2290-
>>> df
2291-
int_col text_col float_col
2292-
0 1 alpha 0.00
2293-
1 2 beta 0.25
2294-
2 3 gamma 0.50
2295-
3 4 delta 0.75
2296-
4 5 epsilon 1.00
2297-
2298-
Prints information of all columns:
2299-
2300-
>>> df.info(verbose=True)
2301-
<class 'pandas.core.frame.DataFrame'>
2302-
RangeIndex: 5 entries, 0 to 4
2303-
Data columns (total 3 columns):
2304-
# Column Non-Null Count Dtype
2305-
--- ------ -------------- -----
2306-
0 int_col 5 non-null int64
2307-
1 text_col 5 non-null object
2308-
2 float_col 5 non-null float64
2309-
dtypes: float64(1), int64(1), object(1)
2310-
memory usage: 248.0+ bytes
2311-
2312-
Prints a summary of columns count and its dtypes but not per column
2313-
information:
2314-
2315-
>>> df.info(verbose=False)
2316-
<class 'pandas.core.frame.DataFrame'>
2317-
RangeIndex: 5 entries, 0 to 4
2318-
Columns: 3 entries, int_col to float_col
2319-
dtypes: float64(1), int64(1), object(1)
2320-
memory usage: 248.0+ bytes
2321-
2322-
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
2323-
buffer content and writes to a text file:
2324-
2325-
>>> import io
2326-
>>> buffer = io.StringIO()
2327-
>>> df.info(buf=buffer)
2328-
>>> s = buffer.getvalue()
2329-
>>> with open("df_info.txt", "w",
2330-
... encoding="utf-8") as f: # doctest: +SKIP
2331-
... f.write(s)
2332-
260
2333-
2334-
The `memory_usage` parameter allows deep introspection mode, specially
2335-
useful for big DataFrames and fine-tune memory optimization:
2336-
2337-
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
2338-
>>> df = pd.DataFrame({
2339-
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2340-
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2341-
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
2342-
... })
2343-
>>> df.info()
2344-
<class 'pandas.core.frame.DataFrame'>
2345-
RangeIndex: 1000000 entries, 0 to 999999
2346-
Data columns (total 3 columns):
2347-
# Column Non-Null Count Dtype
2348-
--- ------ -------------- -----
2349-
0 column_1 1000000 non-null object
2350-
1 column_2 1000000 non-null object
2351-
2 column_3 1000000 non-null object
2352-
dtypes: object(3)
2353-
memory usage: 22.9+ MB
2354-
2355-
>>> df.info(memory_usage='deep')
2356-
<class 'pandas.core.frame.DataFrame'>
2357-
RangeIndex: 1000000 entries, 0 to 999999
2358-
Data columns (total 3 columns):
2359-
# Column Non-Null Count Dtype
2360-
--- ------ -------------- -----
2361-
0 column_1 1000000 non-null object
2362-
1 column_2 1000000 non-null object
2363-
2 column_3 1000000 non-null object
2364-
dtypes: object(3)
2365-
memory usage: 188.8 MB
2366-
"""
2367-
if buf is None: # pragma: no cover
2368-
buf = sys.stdout
2369-
2370-
lines = []
2371-
2372-
lines.append(str(type(self)))
2373-
lines.append(self.index._summary())
2374-
2375-
if len(self.columns) == 0:
2376-
lines.append(f"Empty {type(self).__name__}")
2377-
fmt.buffer_put_lines(buf, lines)
2378-
return
2379-
2380-
cols = self.columns
2381-
col_count = len(self.columns)
2382-
2383-
# hack
2384-
if max_cols is None:
2385-
max_cols = get_option("display.max_info_columns", len(self.columns) + 1)
2386-
2387-
max_rows = get_option("display.max_info_rows", len(self) + 1)
2388-
2389-
if null_counts is None:
2390-
show_counts = (col_count <= max_cols) and (len(self) < max_rows)
2391-
else:
2392-
show_counts = null_counts
2393-
exceeds_info_cols = col_count > max_cols
2394-
2395-
def _verbose_repr():
2396-
lines.append(f"Data columns (total {len(self.columns)} columns):")
2397-
2398-
id_head = " # "
2399-
column_head = "Column"
2400-
col_space = 2
2401-
2402-
max_col = max(len(pprint_thing(k)) for k in cols)
2403-
len_column = len(pprint_thing(column_head))
2404-
space = max(max_col, len_column) + col_space
2405-
2406-
max_id = len(pprint_thing(col_count))
2407-
len_id = len(pprint_thing(id_head))
2408-
space_num = max(max_id, len_id) + col_space
2409-
counts = None
2410-
2411-
header = _put_str(id_head, space_num) + _put_str(column_head, space)
2412-
if show_counts:
2413-
counts = self.count()
2414-
if len(cols) != len(counts): # pragma: no cover
2415-
raise AssertionError(
2416-
f"Columns must equal counts ({len(cols)} != {len(counts)})"
2417-
)
2418-
count_header = "Non-Null Count"
2419-
len_count = len(count_header)
2420-
non_null = " non-null"
2421-
max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
2422-
space_count = max(len_count, max_count) + col_space
2423-
count_temp = "{count}" + non_null
2424-
else:
2425-
count_header = ""
2426-
space_count = len(count_header)
2427-
len_count = space_count
2428-
count_temp = "{count}"
2429-
2430-
dtype_header = "Dtype"
2431-
len_dtype = len(dtype_header)
2432-
max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes)
2433-
space_dtype = max(len_dtype, max_dtypes)
2434-
header += _put_str(count_header, space_count) + _put_str(
2435-
dtype_header, space_dtype
2436-
)
2437-
2438-
lines.append(header)
2439-
lines.append(
2440-
_put_str("-" * len_id, space_num)
2441-
+ _put_str("-" * len_column, space)
2442-
+ _put_str("-" * len_count, space_count)
2443-
+ _put_str("-" * len_dtype, space_dtype)
2444-
)
2445-
2446-
for i, col in enumerate(self.columns):
2447-
dtype = self.dtypes.iloc[i]
2448-
col = pprint_thing(col)
2449-
2450-
line_no = _put_str(f" {i}", space_num)
2451-
count = ""
2452-
if show_counts:
2453-
count = counts.iloc[i]
2454-
2455-
lines.append(
2456-
line_no
2457-
+ _put_str(col, space)
2458-
+ _put_str(count_temp.format(count=count), space_count)
2459-
+ _put_str(dtype, space_dtype)
2460-
)
2461-
2462-
def _non_verbose_repr():
2463-
lines.append(self.columns._summary(name="Columns"))
2464-
2465-
def _sizeof_fmt(num, size_qualifier):
2466-
# returns size in human readable format
2467-
for x in ["bytes", "KB", "MB", "GB", "TB"]:
2468-
if num < 1024.0:
2469-
return f"{num:3.1f}{size_qualifier} {x}"
2470-
num /= 1024.0
2471-
return f"{num:3.1f}{size_qualifier} PB"
2472-
2473-
if verbose:
2474-
_verbose_repr()
2475-
elif verbose is False: # specifically set to False, not nesc None
2476-
_non_verbose_repr()
2477-
else:
2478-
if exceeds_info_cols:
2479-
_non_verbose_repr()
2480-
else:
2481-
_verbose_repr()
2482-
2483-
counts = self._data.get_dtype_counts()
2484-
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
2485-
lines.append(f"dtypes: {', '.join(dtypes)}")
2486-
2487-
if memory_usage is None:
2488-
memory_usage = get_option("display.memory_usage")
2489-
if memory_usage:
2490-
# append memory usage of df to display
2491-
size_qualifier = ""
2492-
if memory_usage == "deep":
2493-
deep = True
2494-
else:
2495-
# size_qualifier is just a best effort; not guaranteed to catch
2496-
# all cases (e.g., it misses categorical data even with object
2497-
# categories)
2498-
deep = False
2499-
if "object" in counts or self.index._is_memory_usage_qualified():
2500-
size_qualifier = "+"
2501-
mem_usage = self.memory_usage(index=True, deep=deep).sum()
2502-
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
2503-
fmt.buffer_put_lines(buf, lines)
2229+
):
2230+
return info(self, verbose, buf, max_cols, memory_usage, null_counts)
25042231

25052232
def memory_usage(self, index=True, deep=False) -> Series:
25062233
"""
@@ -8623,7 +8350,3 @@ def _from_nested_dict(data):
86238350
new_data[col] = new_data.get(col, {})
86248351
new_data[col][index] = v
86258352
return new_data
8626-
8627-
8628-
def _put_str(s, space):
8629-
return str(s)[:space].ljust(space)

0 commit comments

Comments
 (0)