|
14 | 14 | import datetime
|
15 | 15 | from io import StringIO
|
16 | 16 | import itertools
|
17 |
| -import sys |
18 | 17 | from textwrap import dedent
|
19 | 18 | from typing import (
|
20 | 19 | IO,
|
|
130 | 129 |
|
131 | 130 | from pandas.io.common import get_filepath_or_buffer
|
132 | 131 | from pandas.io.formats import console, format as fmt
|
133 |
| -from pandas.io.formats.printing import pprint_thing |
| 132 | +from pandas.io.formats.info import info |
134 | 133 | import pandas.plotting
|
135 | 134 |
|
136 | 135 | if TYPE_CHECKING:
|
@@ -2225,282 +2224,10 @@ def to_html(
|
2225 | 2224 | )
|
2226 | 2225 |
|
2227 | 2226 | # ----------------------------------------------------------------------
|
2228 |
| - |
2229 | 2227 | def info(
|
2230 | 2228 | self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
|
2231 |
| - ) -> None: |
2232 |
| - """ |
2233 |
| - Print a concise summary of a DataFrame. |
2234 |
| -
|
2235 |
| - This method prints information about a DataFrame including |
2236 |
| - the index dtype and column dtypes, non-null values and memory usage. |
2237 |
| -
|
2238 |
| - Parameters |
2239 |
| - ---------- |
2240 |
| - verbose : bool, optional |
2241 |
| - Whether to print the full summary. By default, the setting in |
2242 |
| - ``pandas.options.display.max_info_columns`` is followed. |
2243 |
| - buf : writable buffer, defaults to sys.stdout |
2244 |
| - Where to send the output. By default, the output is printed to |
2245 |
| - sys.stdout. Pass a writable buffer if you need to further process |
2246 |
| - the output. |
2247 |
| - max_cols : int, optional |
2248 |
| - When to switch from the verbose to the truncated output. If the |
2249 |
| - DataFrame has more than `max_cols` columns, the truncated output |
2250 |
| - is used. By default, the setting in |
2251 |
| - ``pandas.options.display.max_info_columns`` is used. |
2252 |
| - memory_usage : bool, str, optional |
2253 |
| - Specifies whether total memory usage of the DataFrame |
2254 |
| - elements (including the index) should be displayed. By default, |
2255 |
| - this follows the ``pandas.options.display.memory_usage`` setting. |
2256 |
| -
|
2257 |
| - True always show memory usage. False never shows memory usage. |
2258 |
| - A value of 'deep' is equivalent to "True with deep introspection". |
2259 |
| - Memory usage is shown in human-readable units (base-2 |
2260 |
| - representation). Without deep introspection a memory estimation is |
2261 |
| - made based in column dtype and number of rows assuming values |
2262 |
| - consume the same memory amount for corresponding dtypes. With deep |
2263 |
| - memory introspection, a real memory usage calculation is performed |
2264 |
| - at the cost of computational resources. |
2265 |
| - null_counts : bool, optional |
2266 |
| - Whether to show the non-null counts. By default, this is shown |
2267 |
| - only if the frame is smaller than |
2268 |
| - ``pandas.options.display.max_info_rows`` and |
2269 |
| - ``pandas.options.display.max_info_columns``. A value of True always |
2270 |
| - shows the counts, and False never shows the counts. |
2271 |
| -
|
2272 |
| - Returns |
2273 |
| - ------- |
2274 |
| - None |
2275 |
| - This method prints a summary of a DataFrame and returns None. |
2276 |
| -
|
2277 |
| - See Also |
2278 |
| - -------- |
2279 |
| - DataFrame.describe: Generate descriptive statistics of DataFrame |
2280 |
| - columns. |
2281 |
| - DataFrame.memory_usage: Memory usage of DataFrame columns. |
2282 |
| -
|
2283 |
| - Examples |
2284 |
| - -------- |
2285 |
| - >>> int_values = [1, 2, 3, 4, 5] |
2286 |
| - >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] |
2287 |
| - >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] |
2288 |
| - >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, |
2289 |
| - ... "float_col": float_values}) |
2290 |
| - >>> df |
2291 |
| - int_col text_col float_col |
2292 |
| - 0 1 alpha 0.00 |
2293 |
| - 1 2 beta 0.25 |
2294 |
| - 2 3 gamma 0.50 |
2295 |
| - 3 4 delta 0.75 |
2296 |
| - 4 5 epsilon 1.00 |
2297 |
| -
|
2298 |
| - Prints information of all columns: |
2299 |
| -
|
2300 |
| - >>> df.info(verbose=True) |
2301 |
| - <class 'pandas.core.frame.DataFrame'> |
2302 |
| - RangeIndex: 5 entries, 0 to 4 |
2303 |
| - Data columns (total 3 columns): |
2304 |
| - # Column Non-Null Count Dtype |
2305 |
| - --- ------ -------------- ----- |
2306 |
| - 0 int_col 5 non-null int64 |
2307 |
| - 1 text_col 5 non-null object |
2308 |
| - 2 float_col 5 non-null float64 |
2309 |
| - dtypes: float64(1), int64(1), object(1) |
2310 |
| - memory usage: 248.0+ bytes |
2311 |
| -
|
2312 |
| - Prints a summary of columns count and its dtypes but not per column |
2313 |
| - information: |
2314 |
| -
|
2315 |
| - >>> df.info(verbose=False) |
2316 |
| - <class 'pandas.core.frame.DataFrame'> |
2317 |
| - RangeIndex: 5 entries, 0 to 4 |
2318 |
| - Columns: 3 entries, int_col to float_col |
2319 |
| - dtypes: float64(1), int64(1), object(1) |
2320 |
| - memory usage: 248.0+ bytes |
2321 |
| -
|
2322 |
| - Pipe output of DataFrame.info to buffer instead of sys.stdout, get |
2323 |
| - buffer content and writes to a text file: |
2324 |
| -
|
2325 |
| - >>> import io |
2326 |
| - >>> buffer = io.StringIO() |
2327 |
| - >>> df.info(buf=buffer) |
2328 |
| - >>> s = buffer.getvalue() |
2329 |
| - >>> with open("df_info.txt", "w", |
2330 |
| - ... encoding="utf-8") as f: # doctest: +SKIP |
2331 |
| - ... f.write(s) |
2332 |
| - 260 |
2333 |
| -
|
2334 |
| - The `memory_usage` parameter allows deep introspection mode, specially |
2335 |
| - useful for big DataFrames and fine-tune memory optimization: |
2336 |
| -
|
2337 |
| - >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) |
2338 |
| - >>> df = pd.DataFrame({ |
2339 |
| - ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), |
2340 |
| - ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), |
2341 |
| - ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) |
2342 |
| - ... }) |
2343 |
| - >>> df.info() |
2344 |
| - <class 'pandas.core.frame.DataFrame'> |
2345 |
| - RangeIndex: 1000000 entries, 0 to 999999 |
2346 |
| - Data columns (total 3 columns): |
2347 |
| - # Column Non-Null Count Dtype |
2348 |
| - --- ------ -------------- ----- |
2349 |
| - 0 column_1 1000000 non-null object |
2350 |
| - 1 column_2 1000000 non-null object |
2351 |
| - 2 column_3 1000000 non-null object |
2352 |
| - dtypes: object(3) |
2353 |
| - memory usage: 22.9+ MB |
2354 |
| -
|
2355 |
| - >>> df.info(memory_usage='deep') |
2356 |
| - <class 'pandas.core.frame.DataFrame'> |
2357 |
| - RangeIndex: 1000000 entries, 0 to 999999 |
2358 |
| - Data columns (total 3 columns): |
2359 |
| - # Column Non-Null Count Dtype |
2360 |
| - --- ------ -------------- ----- |
2361 |
| - 0 column_1 1000000 non-null object |
2362 |
| - 1 column_2 1000000 non-null object |
2363 |
| - 2 column_3 1000000 non-null object |
2364 |
| - dtypes: object(3) |
2365 |
| - memory usage: 188.8 MB |
2366 |
| - """ |
2367 |
| - if buf is None: # pragma: no cover |
2368 |
| - buf = sys.stdout |
2369 |
| - |
2370 |
| - lines = [] |
2371 |
| - |
2372 |
| - lines.append(str(type(self))) |
2373 |
| - lines.append(self.index._summary()) |
2374 |
| - |
2375 |
| - if len(self.columns) == 0: |
2376 |
| - lines.append(f"Empty {type(self).__name__}") |
2377 |
| - fmt.buffer_put_lines(buf, lines) |
2378 |
| - return |
2379 |
| - |
2380 |
| - cols = self.columns |
2381 |
| - col_count = len(self.columns) |
2382 |
| - |
2383 |
| - # hack |
2384 |
| - if max_cols is None: |
2385 |
| - max_cols = get_option("display.max_info_columns", len(self.columns) + 1) |
2386 |
| - |
2387 |
| - max_rows = get_option("display.max_info_rows", len(self) + 1) |
2388 |
| - |
2389 |
| - if null_counts is None: |
2390 |
| - show_counts = (col_count <= max_cols) and (len(self) < max_rows) |
2391 |
| - else: |
2392 |
| - show_counts = null_counts |
2393 |
| - exceeds_info_cols = col_count > max_cols |
2394 |
| - |
2395 |
| - def _verbose_repr(): |
2396 |
| - lines.append(f"Data columns (total {len(self.columns)} columns):") |
2397 |
| - |
2398 |
| - id_head = " # " |
2399 |
| - column_head = "Column" |
2400 |
| - col_space = 2 |
2401 |
| - |
2402 |
| - max_col = max(len(pprint_thing(k)) for k in cols) |
2403 |
| - len_column = len(pprint_thing(column_head)) |
2404 |
| - space = max(max_col, len_column) + col_space |
2405 |
| - |
2406 |
| - max_id = len(pprint_thing(col_count)) |
2407 |
| - len_id = len(pprint_thing(id_head)) |
2408 |
| - space_num = max(max_id, len_id) + col_space |
2409 |
| - counts = None |
2410 |
| - |
2411 |
| - header = _put_str(id_head, space_num) + _put_str(column_head, space) |
2412 |
| - if show_counts: |
2413 |
| - counts = self.count() |
2414 |
| - if len(cols) != len(counts): # pragma: no cover |
2415 |
| - raise AssertionError( |
2416 |
| - f"Columns must equal counts ({len(cols)} != {len(counts)})" |
2417 |
| - ) |
2418 |
| - count_header = "Non-Null Count" |
2419 |
| - len_count = len(count_header) |
2420 |
| - non_null = " non-null" |
2421 |
| - max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) |
2422 |
| - space_count = max(len_count, max_count) + col_space |
2423 |
| - count_temp = "{count}" + non_null |
2424 |
| - else: |
2425 |
| - count_header = "" |
2426 |
| - space_count = len(count_header) |
2427 |
| - len_count = space_count |
2428 |
| - count_temp = "{count}" |
2429 |
| - |
2430 |
| - dtype_header = "Dtype" |
2431 |
| - len_dtype = len(dtype_header) |
2432 |
| - max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes) |
2433 |
| - space_dtype = max(len_dtype, max_dtypes) |
2434 |
| - header += _put_str(count_header, space_count) + _put_str( |
2435 |
| - dtype_header, space_dtype |
2436 |
| - ) |
2437 |
| - |
2438 |
| - lines.append(header) |
2439 |
| - lines.append( |
2440 |
| - _put_str("-" * len_id, space_num) |
2441 |
| - + _put_str("-" * len_column, space) |
2442 |
| - + _put_str("-" * len_count, space_count) |
2443 |
| - + _put_str("-" * len_dtype, space_dtype) |
2444 |
| - ) |
2445 |
| - |
2446 |
| - for i, col in enumerate(self.columns): |
2447 |
| - dtype = self.dtypes.iloc[i] |
2448 |
| - col = pprint_thing(col) |
2449 |
| - |
2450 |
| - line_no = _put_str(f" {i}", space_num) |
2451 |
| - count = "" |
2452 |
| - if show_counts: |
2453 |
| - count = counts.iloc[i] |
2454 |
| - |
2455 |
| - lines.append( |
2456 |
| - line_no |
2457 |
| - + _put_str(col, space) |
2458 |
| - + _put_str(count_temp.format(count=count), space_count) |
2459 |
| - + _put_str(dtype, space_dtype) |
2460 |
| - ) |
2461 |
| - |
2462 |
| - def _non_verbose_repr(): |
2463 |
| - lines.append(self.columns._summary(name="Columns")) |
2464 |
| - |
2465 |
| - def _sizeof_fmt(num, size_qualifier): |
2466 |
| - # returns size in human readable format |
2467 |
| - for x in ["bytes", "KB", "MB", "GB", "TB"]: |
2468 |
| - if num < 1024.0: |
2469 |
| - return f"{num:3.1f}{size_qualifier} {x}" |
2470 |
| - num /= 1024.0 |
2471 |
| - return f"{num:3.1f}{size_qualifier} PB" |
2472 |
| - |
2473 |
| - if verbose: |
2474 |
| - _verbose_repr() |
2475 |
| - elif verbose is False: # specifically set to False, not nesc None |
2476 |
| - _non_verbose_repr() |
2477 |
| - else: |
2478 |
| - if exceeds_info_cols: |
2479 |
| - _non_verbose_repr() |
2480 |
| - else: |
2481 |
| - _verbose_repr() |
2482 |
| - |
2483 |
| - counts = self._data.get_dtype_counts() |
2484 |
| - dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] |
2485 |
| - lines.append(f"dtypes: {', '.join(dtypes)}") |
2486 |
| - |
2487 |
| - if memory_usage is None: |
2488 |
| - memory_usage = get_option("display.memory_usage") |
2489 |
| - if memory_usage: |
2490 |
| - # append memory usage of df to display |
2491 |
| - size_qualifier = "" |
2492 |
| - if memory_usage == "deep": |
2493 |
| - deep = True |
2494 |
| - else: |
2495 |
| - # size_qualifier is just a best effort; not guaranteed to catch |
2496 |
| - # all cases (e.g., it misses categorical data even with object |
2497 |
| - # categories) |
2498 |
| - deep = False |
2499 |
| - if "object" in counts or self.index._is_memory_usage_qualified(): |
2500 |
| - size_qualifier = "+" |
2501 |
| - mem_usage = self.memory_usage(index=True, deep=deep).sum() |
2502 |
| - lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") |
2503 |
| - fmt.buffer_put_lines(buf, lines) |
| 2229 | + ): |
| 2230 | + return info(self, verbose, buf, max_cols, memory_usage, null_counts) |
2504 | 2231 |
|
2505 | 2232 | def memory_usage(self, index=True, deep=False) -> Series:
|
2506 | 2233 | """
|
@@ -8590,7 +8317,3 @@ def _from_nested_dict(data):
|
8590 | 8317 | new_data[col] = new_data.get(col, {})
|
8591 | 8318 | new_data[col][index] = v
|
8592 | 8319 | return new_data
|
8593 |
| - |
8594 |
| - |
8595 |
| -def _put_str(s, space): |
8596 |
| - return str(s)[:space].ljust(space) |
0 commit comments