Skip to content

Commit 61b6a8e

Browse files
Marco GorelliMarcoGorelli
Marco Gorelli
authored andcommitted
Move info and tests to their own files in IO/formats
1 parent 48cb5a9 commit 61b6a8e

File tree

6 files changed

+690
-666
lines changed

6 files changed

+690
-666
lines changed

pandas/conftest.py

+29
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,35 @@ def axis_series(request):
111111
return request.param
112112

113113

114+
@pytest.fixture
115+
def datetime_frame():
116+
"""
117+
Fixture for DataFrame of floats with DatetimeIndex
118+
119+
Columns are ['A', 'B', 'C', 'D']
120+
121+
A B C D
122+
2000-01-03 -1.122153 0.468535 0.122226 1.693711
123+
2000-01-04 0.189378 0.486100 0.007864 -1.216052
124+
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
125+
2000-01-06 0.430050 0.894352 0.090719 0.036939
126+
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
127+
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
128+
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
129+
... ... ... ... ...
130+
2000-02-03 1.642618 -0.579288 0.046005 1.385249
131+
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
132+
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
133+
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
134+
2000-02-09 1.377373 0.398619 1.008453 -0.928207
135+
2000-02-10 0.473194 -0.636677 0.984058 0.511519
136+
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948
137+
138+
[30 rows x 4 columns]
139+
"""
140+
return DataFrame(tm.getTimeSeriesData())
141+
142+
114143
@pytest.fixture
115144
def ip():
116145
"""

pandas/core/frame.py

+3-280
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import datetime
1515
from io import StringIO
1616
import itertools
17-
import sys
1817
from textwrap import dedent
1918
from typing import (
2019
IO,
@@ -131,7 +130,7 @@
131130

132131
from pandas.io.common import get_filepath_or_buffer
133132
from pandas.io.formats import console, format as fmt
134-
from pandas.io.formats.printing import pprint_thing
133+
from pandas.io.formats.info import info
135134
import pandas.plotting
136135

137136
if TYPE_CHECKING:
@@ -2226,282 +2225,10 @@ def to_html(
22262225
)
22272226

22282227
# ----------------------------------------------------------------------
2229-
22302228
def info(
22312229
self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
2232-
) -> None:
2233-
"""
2234-
Print a concise summary of a DataFrame.
2235-
2236-
This method prints information about a DataFrame including
2237-
the index dtype and column dtypes, non-null values and memory usage.
2238-
2239-
Parameters
2240-
----------
2241-
verbose : bool, optional
2242-
Whether to print the full summary. By default, the setting in
2243-
``pandas.options.display.max_info_columns`` is followed.
2244-
buf : writable buffer, defaults to sys.stdout
2245-
Where to send the output. By default, the output is printed to
2246-
sys.stdout. Pass a writable buffer if you need to further process
2247-
the output.
2248-
max_cols : int, optional
2249-
When to switch from the verbose to the truncated output. If the
2250-
DataFrame has more than `max_cols` columns, the truncated output
2251-
is used. By default, the setting in
2252-
``pandas.options.display.max_info_columns`` is used.
2253-
memory_usage : bool, str, optional
2254-
Specifies whether total memory usage of the DataFrame
2255-
elements (including the index) should be displayed. By default,
2256-
this follows the ``pandas.options.display.memory_usage`` setting.
2257-
2258-
True always show memory usage. False never shows memory usage.
2259-
A value of 'deep' is equivalent to "True with deep introspection".
2260-
Memory usage is shown in human-readable units (base-2
2261-
representation). Without deep introspection a memory estimation is
2262-
made based in column dtype and number of rows assuming values
2263-
consume the same memory amount for corresponding dtypes. With deep
2264-
memory introspection, a real memory usage calculation is performed
2265-
at the cost of computational resources.
2266-
null_counts : bool, optional
2267-
Whether to show the non-null counts. By default, this is shown
2268-
only if the frame is smaller than
2269-
``pandas.options.display.max_info_rows`` and
2270-
``pandas.options.display.max_info_columns``. A value of True always
2271-
shows the counts, and False never shows the counts.
2272-
2273-
Returns
2274-
-------
2275-
None
2276-
This method prints a summary of a DataFrame and returns None.
2277-
2278-
See Also
2279-
--------
2280-
DataFrame.describe: Generate descriptive statistics of DataFrame
2281-
columns.
2282-
DataFrame.memory_usage: Memory usage of DataFrame columns.
2283-
2284-
Examples
2285-
--------
2286-
>>> int_values = [1, 2, 3, 4, 5]
2287-
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
2288-
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
2289-
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
2290-
... "float_col": float_values})
2291-
>>> df
2292-
int_col text_col float_col
2293-
0 1 alpha 0.00
2294-
1 2 beta 0.25
2295-
2 3 gamma 0.50
2296-
3 4 delta 0.75
2297-
4 5 epsilon 1.00
2298-
2299-
Prints information of all columns:
2300-
2301-
>>> df.info(verbose=True)
2302-
<class 'pandas.core.frame.DataFrame'>
2303-
RangeIndex: 5 entries, 0 to 4
2304-
Data columns (total 3 columns):
2305-
# Column Non-Null Count Dtype
2306-
--- ------ -------------- -----
2307-
0 int_col 5 non-null int64
2308-
1 text_col 5 non-null object
2309-
2 float_col 5 non-null float64
2310-
dtypes: float64(1), int64(1), object(1)
2311-
memory usage: 248.0+ bytes
2312-
2313-
Prints a summary of columns count and its dtypes but not per column
2314-
information:
2315-
2316-
>>> df.info(verbose=False)
2317-
<class 'pandas.core.frame.DataFrame'>
2318-
RangeIndex: 5 entries, 0 to 4
2319-
Columns: 3 entries, int_col to float_col
2320-
dtypes: float64(1), int64(1), object(1)
2321-
memory usage: 248.0+ bytes
2322-
2323-
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
2324-
buffer content and writes to a text file:
2325-
2326-
>>> import io
2327-
>>> buffer = io.StringIO()
2328-
>>> df.info(buf=buffer)
2329-
>>> s = buffer.getvalue()
2330-
>>> with open("df_info.txt", "w",
2331-
... encoding="utf-8") as f: # doctest: +SKIP
2332-
... f.write(s)
2333-
260
2334-
2335-
The `memory_usage` parameter allows deep introspection mode, specially
2336-
useful for big DataFrames and fine-tune memory optimization:
2337-
2338-
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
2339-
>>> df = pd.DataFrame({
2340-
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2341-
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2342-
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
2343-
... })
2344-
>>> df.info()
2345-
<class 'pandas.core.frame.DataFrame'>
2346-
RangeIndex: 1000000 entries, 0 to 999999
2347-
Data columns (total 3 columns):
2348-
# Column Non-Null Count Dtype
2349-
--- ------ -------------- -----
2350-
0 column_1 1000000 non-null object
2351-
1 column_2 1000000 non-null object
2352-
2 column_3 1000000 non-null object
2353-
dtypes: object(3)
2354-
memory usage: 22.9+ MB
2355-
2356-
>>> df.info(memory_usage='deep')
2357-
<class 'pandas.core.frame.DataFrame'>
2358-
RangeIndex: 1000000 entries, 0 to 999999
2359-
Data columns (total 3 columns):
2360-
# Column Non-Null Count Dtype
2361-
--- ------ -------------- -----
2362-
0 column_1 1000000 non-null object
2363-
1 column_2 1000000 non-null object
2364-
2 column_3 1000000 non-null object
2365-
dtypes: object(3)
2366-
memory usage: 188.8 MB
2367-
"""
2368-
if buf is None: # pragma: no cover
2369-
buf = sys.stdout
2370-
2371-
lines = []
2372-
2373-
lines.append(str(type(self)))
2374-
lines.append(self.index._summary())
2375-
2376-
if len(self.columns) == 0:
2377-
lines.append(f"Empty {type(self).__name__}")
2378-
fmt.buffer_put_lines(buf, lines)
2379-
return
2380-
2381-
cols = self.columns
2382-
col_count = len(self.columns)
2383-
2384-
# hack
2385-
if max_cols is None:
2386-
max_cols = get_option("display.max_info_columns", len(self.columns) + 1)
2387-
2388-
max_rows = get_option("display.max_info_rows", len(self) + 1)
2389-
2390-
if null_counts is None:
2391-
show_counts = (col_count <= max_cols) and (len(self) < max_rows)
2392-
else:
2393-
show_counts = null_counts
2394-
exceeds_info_cols = col_count > max_cols
2395-
2396-
def _verbose_repr():
2397-
lines.append(f"Data columns (total {len(self.columns)} columns):")
2398-
2399-
id_head = " # "
2400-
column_head = "Column"
2401-
col_space = 2
2402-
2403-
max_col = max(len(pprint_thing(k)) for k in cols)
2404-
len_column = len(pprint_thing(column_head))
2405-
space = max(max_col, len_column) + col_space
2406-
2407-
max_id = len(pprint_thing(col_count))
2408-
len_id = len(pprint_thing(id_head))
2409-
space_num = max(max_id, len_id) + col_space
2410-
counts = None
2411-
2412-
header = _put_str(id_head, space_num) + _put_str(column_head, space)
2413-
if show_counts:
2414-
counts = self.count()
2415-
if len(cols) != len(counts): # pragma: no cover
2416-
raise AssertionError(
2417-
f"Columns must equal counts ({len(cols)} != {len(counts)})"
2418-
)
2419-
count_header = "Non-Null Count"
2420-
len_count = len(count_header)
2421-
non_null = " non-null"
2422-
max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
2423-
space_count = max(len_count, max_count) + col_space
2424-
count_temp = "{count}" + non_null
2425-
else:
2426-
count_header = ""
2427-
space_count = len(count_header)
2428-
len_count = space_count
2429-
count_temp = "{count}"
2430-
2431-
dtype_header = "Dtype"
2432-
len_dtype = len(dtype_header)
2433-
max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes)
2434-
space_dtype = max(len_dtype, max_dtypes)
2435-
header += _put_str(count_header, space_count) + _put_str(
2436-
dtype_header, space_dtype
2437-
)
2438-
2439-
lines.append(header)
2440-
lines.append(
2441-
_put_str("-" * len_id, space_num)
2442-
+ _put_str("-" * len_column, space)
2443-
+ _put_str("-" * len_count, space_count)
2444-
+ _put_str("-" * len_dtype, space_dtype)
2445-
)
2446-
2447-
for i, col in enumerate(self.columns):
2448-
dtype = self.dtypes.iloc[i]
2449-
col = pprint_thing(col)
2450-
2451-
line_no = _put_str(f" {i}", space_num)
2452-
count = ""
2453-
if show_counts:
2454-
count = counts.iloc[i]
2455-
2456-
lines.append(
2457-
line_no
2458-
+ _put_str(col, space)
2459-
+ _put_str(count_temp.format(count=count), space_count)
2460-
+ _put_str(dtype, space_dtype)
2461-
)
2462-
2463-
def _non_verbose_repr():
2464-
lines.append(self.columns._summary(name="Columns"))
2465-
2466-
def _sizeof_fmt(num, size_qualifier):
2467-
# returns size in human readable format
2468-
for x in ["bytes", "KB", "MB", "GB", "TB"]:
2469-
if num < 1024.0:
2470-
return f"{num:3.1f}{size_qualifier} {x}"
2471-
num /= 1024.0
2472-
return f"{num:3.1f}{size_qualifier} PB"
2473-
2474-
if verbose:
2475-
_verbose_repr()
2476-
elif verbose is False: # specifically set to False, not nesc None
2477-
_non_verbose_repr()
2478-
else:
2479-
if exceeds_info_cols:
2480-
_non_verbose_repr()
2481-
else:
2482-
_verbose_repr()
2483-
2484-
counts = self._data.get_dtype_counts()
2485-
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
2486-
lines.append(f"dtypes: {', '.join(dtypes)}")
2487-
2488-
if memory_usage is None:
2489-
memory_usage = get_option("display.memory_usage")
2490-
if memory_usage:
2491-
# append memory usage of df to display
2492-
size_qualifier = ""
2493-
if memory_usage == "deep":
2494-
deep = True
2495-
else:
2496-
# size_qualifier is just a best effort; not guaranteed to catch
2497-
# all cases (e.g., it misses categorical data even with object
2498-
# categories)
2499-
deep = False
2500-
if "object" in counts or self.index._is_memory_usage_qualified():
2501-
size_qualifier = "+"
2502-
mem_usage = self.memory_usage(index=True, deep=deep).sum()
2503-
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
2504-
fmt.buffer_put_lines(buf, lines)
2230+
):
2231+
return info(self, verbose, buf, max_cols, memory_usage, null_counts)
25052232

25062233
def memory_usage(self, index=True, deep=False) -> Series:
25072234
"""
@@ -8606,7 +8333,3 @@ def _from_nested_dict(data):
86068333
new_data[col] = new_data.get(col, {})
86078334
new_data[col][index] = v
86088335
return new_data
8609-
8610-
8611-
def _put_str(s, space):
8612-
return str(s)[:space].ljust(space)

0 commit comments

Comments
 (0)