-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
DEPR: error_bad_lines and warn_bad_lines for read_csv #40413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
4e867e9
d230035
ce5bf29
8629f87
06f87a1
edeef7e
f806a4d
5b08a88
af3fd15
f70f34e
0c76180
a0406b5
89fdc70
f7265a3
1e20b53
2e79f9a
fe7541c
772c13f
d00e601
e267aa4
e724d0b
fdef68e
a220293
9b8468a
a6af9aa
2f70edc
93c37df
cf3201c
4911b27
f28316b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from collections import defaultdict | ||
import csv | ||
import datetime | ||
from enum import Enum | ||
import itertools | ||
from typing import ( | ||
Any, | ||
|
@@ -114,6 +115,11 @@ | |
|
||
|
||
class ParserBase: | ||
class BadLineHandleMethod(Enum): | ||
ERROR = 0 | ||
WARN = 1 | ||
SKIP = 2 | ||
|
||
def __init__(self, kwds): | ||
|
||
self.names = kwds.get("names") | ||
|
@@ -202,6 +208,25 @@ def __init__(self, kwds): | |
|
||
self.handles: Optional[IOHandles] = None | ||
|
||
# Bad line handling | ||
on_bad_lines = kwds.get("on_bad_lines") | ||
if on_bad_lines is not None: | ||
if on_bad_lines == "error": | ||
self.on_bad_lines = self.BadLineHandleMethod.ERROR | ||
elif on_bad_lines == "warn": | ||
self.on_bad_lines = self.BadLineHandleMethod.WARN | ||
elif on_bad_lines == "skip": | ||
self.on_bad_lines = self.BadLineHandleMethod.SKIP | ||
else: | ||
raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines") | ||
else: | ||
if kwds.get("error_bad_lines"): | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these need a deprecation warning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _deprecated_defaults handles this for us. |
||
self.on_bad_lines = self.BadLineHandleMethod.ERROR | ||
elif kwds.get("warn_bad_lines"): | ||
self.on_bad_lines = self.BadLineHandleMethod.WARN | ||
else: | ||
self.on_bad_lines = self.BadLineHandleMethod.SKIP | ||
|
||
def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None: | ||
""" | ||
Let the readers open IOHanldes after they are done with their potential raises. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -325,9 +325,38 @@ | |
default cause an exception to be raised, and no DataFrame will be returned. | ||
If False, then these "bad lines" will be dropped from the DataFrame that is | ||
returned. | ||
|
||
.. deprecated:: 1.3 | ||
The ``on_bad_lines`` parameter takes precedence over this parameter | ||
when specified and should be used instead to specify behavior upon | ||
encountering a bad line instead. | ||
warn_bad_lines : bool, default True | ||
If error_bad_lines is False, and warn_bad_lines is True, a warning for each | ||
"bad line" will be output. | ||
|
||
.. deprecated:: 1.3 | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
The ``on_bad_lines`` parameter takes precedence over this parameter | ||
when specified and should be used instead to specify behavior upon | ||
encountering a bad line instead. | ||
on_bad_lines : {{None, 'error', 'warn', 'skip'}}, default ``None`` | ||
Specifies what to do upon encountering a bad line (a line with too many fields). | ||
Allowed values are : | ||
|
||
- ``None``, default option, defer to ``error_bad_lines`` and ``warn_bad_lines``. | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Note: This option is only present for backwards-compatibility reasons and will | ||
be removed after the removal of ``error_bad_lines`` and ``warn_bad_lines``. | ||
Please do not specify it explicitly. | ||
|
||
- 'error', raise an Exception when a bad line is encountered. | ||
- 'warn', raise a warning when a bad line is encountered and skip that line. | ||
- 'skip', skip bad lines without raising or warning when they are encountered. | ||
|
||
This parameter takes precedence over parameters | ||
``error_bad_lines`` and ``warn_bad_lines`` if specified. | ||
|
||
.. versionadded:: 1.3 | ||
|
||
delim_whitespace : bool, default False | ||
Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be | ||
used as the sep. Equivalent to setting ``sep='\\s+'``. If this option | ||
|
@@ -382,6 +411,7 @@ | |
"memory_map": False, | ||
"error_bad_lines": True, | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"warn_bad_lines": True, | ||
"on_bad_lines": None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah should remove error/warn_bad_lines from here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _get_options_with_defaults is really spaghetti-fied right now, so removing this would not the args not passed to the parser. I will try to clean up _get_options_with_defaults in a future PR if I have time. |
||
"float_precision": None, | ||
} | ||
|
||
|
@@ -390,8 +420,8 @@ | |
_c_unsupported = {"skipfooter"} | ||
_python_unsupported = {"low_memory", "float_precision"} | ||
|
||
_deprecated_defaults: Dict[str, Any] = {} | ||
_deprecated_args: Set[str] = set() | ||
_deprecated_defaults: Dict[str, Any] = {"error_bad_lines": True, "warn_bad_lines": True} | ||
_deprecated_args: Set[str] = {"error_bad_lines", "warn_bad_lines"} | ||
|
||
|
||
def validate_integer(name, val, min_val=0): | ||
|
@@ -533,6 +563,8 @@ def read_csv( | |
# Error Handling | ||
error_bad_lines=True, | ||
warn_bad_lines=True, | ||
# TODO: disallow and change None to 'error' in on_bad_lines in 2.0 | ||
on_bad_lines=None, | ||
# Internal | ||
delim_whitespace=False, | ||
low_memory=_c_parser_defaults["low_memory"], | ||
|
@@ -613,6 +645,8 @@ def read_table( | |
# Error Handling | ||
error_bad_lines=True, | ||
warn_bad_lines=True, | ||
# TODO: disallow and change None to 'error' in on_bad_lines in 2.0 | ||
on_bad_lines=None, | ||
encoding_errors: Optional[str] = "strict", | ||
# Internal | ||
delim_whitespace=False, | ||
|
@@ -924,7 +958,7 @@ def _clean_options(self, options, engine): | |
f"The {arg} argument has been deprecated and will be " | ||
"removed in a future version.\n\n" | ||
) | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
warnings.warn(msg, FutureWarning, stacklevel=6) | ||
else: | ||
result[arg] = parser_default | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.