From 83fb858bc265997e82cb60b5a1dd2795ce18e381 Mon Sep 17 00:00:00 2001 From: Matthew Davis Date: Mon, 24 Jun 2024 20:39:27 +0200 Subject: [PATCH 1/2] DOC: Clarify interaction of read_csv nrows with other args (#59078) --- pandas/io/parsers/readers.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index d00fc3b15976c..b7c44163b439b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -268,7 +268,17 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): skipfooter : int, default 0 Number of lines at bottom of file to skip (Unsupported with ``engine='c'``). nrows : int, optional - Number of rows of file to read. Useful for reading pieces of large files. + Number of rows of data to read. Useful for reading pieces of large files. + The following rows are excluded from the count: + + * The header row containing column names, + * Rows before the header row, if ``header=1`` or larger, + * Fully commented rows, + * Rows skipped with ``skiprows``, + * Skipped blank lines. + + Records containing text with newline characters within quotes + or escaped newline characters will be counted as one row. na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific per-column ``NA`` values. By default the following values are interpreted as From c03c1d1f5b3049b5306b6a6c504a52f4d334b9ea Mon Sep 17 00:00:00 2001 From: Matthew Davis Date: Wed, 26 Jun 2024 19:34:12 +0200 Subject: [PATCH 2/2] DOCS: Simplify explanation of read_csv nrows --- pandas/io/parsers/readers.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index b7c44163b439b..d6bb29b055b91 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -268,17 +268,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): skipfooter : int, default 0 Number of lines at bottom of file to skip (Unsupported with ``engine='c'``). nrows : int, optional - Number of rows of data to read. Useful for reading pieces of large files. - The following rows are excluded from the count: - - * The header row containing column names, - * Rows before the header row, if ``header=1`` or larger, - * Fully commented rows, - * Rows skipped with ``skiprows``, - * Skipped blank lines. - - Records containing text with newline characters within quotes - or escaped newline characters will be counted as one row. + Maximum number of rows of data to return (excluding the column header). Useful for reading pieces of large files. na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific per-column ``NA`` values. By default the following values are interpreted as