From 1708d7966e6d99106b5c27e777779853d3d15f81 Mon Sep 17 00:00:00 2001 From: John Tran Date: Fri, 9 Aug 2024 19:11:56 -0700 Subject: [PATCH 1/2] DOC: Clarify nrows behavior in read_csv --- pandas/io/parsers/readers.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0cca1ebdb8c8f..5cc511c4b28a3 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -268,6 +268,18 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): Number of lines at bottom of file to skip (Unsupported with ``engine='c'``). nrows : int, optional Number of rows of file to read. Useful for reading pieces of large files. + Refers to the number of data rows in the returned DataFrame, excluding: + + * The header row containing column names. + * Rows before the header row, if ``header=1`` or larger. + + Example usage: + + * To read the first 999,999 (non-header) rows: + ``read_csv(..., nrows=999999)`` + + * To read rows 1,000,000 through 1,999,999: + ``read_csv(..., skiprows=1000000, nrows=999999)`` na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific per-column ``NA`` values. By default the following values are interpreted as From 187157b640dc756cf37811539a35939eaf9eb2fb Mon Sep 17 00:00:00 2001 From: John Tran Date: Fri, 9 Aug 2024 19:57:59 -0700 Subject: [PATCH 2/2] Remove whitespace from a blank line --- pandas/io/parsers/readers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 5cc511c4b28a3..6a287a42d7091 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -274,10 +274,10 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): * Rows before the header row, if ``header=1`` or larger. Example usage: - + * To read the first 999,999 (non-header) rows: ``read_csv(..., nrows=999999)`` - + * To read rows 1,000,000 through 1,999,999: ``read_csv(..., skiprows=1000000, nrows=999999)`` na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional