From c8c5958de8793ad4b7e5dc9973c1a3f9bb2aa8c2 Mon Sep 17 00:00:00 2001 From: William Andrea <22385371+wjandrea@users.noreply.github.com> Date: Thu, 9 May 2024 17:45:49 -0400 Subject: [PATCH 1/3] Clarify allowed values for on_bad_lines in read_csv Move the callable options out of the version added/changed tags and improve the flow. --- pandas/io/parsers/readers.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 648e5108df77a..fe8db43c33e3e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -413,25 +413,27 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): - ``'error'``, raise an Exception when a bad line is encountered. - ``'warn'``, raise a warning when a bad line is encountered and skip that line. - ``'skip'``, skip bad lines without raising or warning when they are encountered. - + - Callable, function that will process a single bad line. + - With ``engine='python'``, function with signature + ``(bad_line: list[str]) -> list[str] | None``. + ``bad_line`` is a list of strings split by the ``sep``. + If the function returns ``None``, the bad line will be ignored. + If the function returns a new ``list`` of strings with more elements than + expected, a ``ParserWarning`` will be emitted while dropping extra elements. + - With ``engine='pyarrow'``, function with signature + as described in `pyarrow documentation + `_. + .. versionadded:: 1.3.0 .. versionadded:: 1.4.0 - - Callable, function with signature - ``(bad_line: list[str]) -> list[str] | None`` that will process a single - bad line. ``bad_line`` is a list of strings split by the ``sep``. - If the function returns ``None``, the bad line will be ignored. - If the function returns a new ``list`` of strings with more elements than - expected, a ``ParserWarning`` will be emitted while dropping extra elements. - Only supported when ``engine='python'`` + Callable .. versionchanged:: 2.2.0 - - Callable, function with signature - as described in `pyarrow documentation - `_ when ``engine='pyarrow'`` + Callable for ``engine='pyarrow'`` delim_whitespace : bool, default False Specifies whether or not whitespace (e.g. ``' '`` or ``'\\t'``) will be From c3b6c9e9c54c4648158d9fd475fe1152dd88bfa1 Mon Sep 17 00:00:00 2001 From: William Andrea <22385371+wjandrea@users.noreply.github.com> Date: Thu, 9 May 2024 18:01:39 -0400 Subject: [PATCH 2/3] typo space before colon --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index fe8db43c33e3e..fe3a6d2da6f51 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -408,7 +408,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): documentation for more details. on_bad_lines : {{'error', 'warn', 'skip'}} or Callable, default 'error' Specifies what to do upon encountering a bad line (a line with too many fields). - Allowed values are : + Allowed values are: - ``'error'``, raise an Exception when a bad line is encountered. - ``'warn'``, raise a warning when a bad line is encountered and skip that line. From 614ffd1f055a877afc7875619ec118fb4402da36 Mon Sep 17 00:00:00 2001 From: William Andrea <22385371+wjandrea@users.noreply.github.com> Date: Fri, 10 May 2024 12:15:47 -0400 Subject: [PATCH 3/3] trim trailing whitespace --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index fe3a6d2da6f51..c2a65bfaac50b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -424,7 +424,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): as described in `pyarrow documentation `_. - + .. versionadded:: 1.3.0 .. versionadded:: 1.4.0