DOC: Clarify nrows behavior in read_csv (#59467)

johnyu013 · web-flow · commit 9c776ae0e5d9 · 2024-08-12T10:36:08.000-07:00
* DOC: Clarify nrows behavior in read_csv

* Remove whitespace from a blank line
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -268,6 +268,18 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
     Number of lines at bottom of file to skip (Unsupported with ``engine='c'``).
 nrows : int, optional
     Number of rows of file to read. Useful for reading pieces of large files.
+    Refers to the number of data rows in the returned DataFrame, excluding:
+
+    * The header row containing column names.
+    * Rows before the header row, if ``header=1`` or larger.
+
+    Example usage:
+
+    * To read the first 999,999 (non-header) rows:
+      ``read_csv(..., nrows=999999)``
+
+    * To read rows 1,000,000 through 1,999,999:
+      ``read_csv(..., skiprows=1000000, nrows=999999)``
 na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional
     Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific
     per-column ``NA`` values.  By default the following values are interpreted as