-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF/CLN: pandas/io/parsers.py #36852
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
0a87ba4
63e625d
7ac3b6d
8dc74fd
06fcf4c
da7a9b2
4c024da
f10984f
19f6533
c8e0bc9
fc1b627
6e7c0d0
ba4a389
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -867,71 +867,23 @@ def __init__(self, f, engine=None, **kwds): | |
else: | ||
engine = "python" | ||
engine_specified = False | ||
|
||
self.engine = engine | ||
self._engine_specified = kwds.get("engine_specified", engine_specified) | ||
|
||
if kwds.get("dialect") is not None: | ||
dialect = kwds["dialect"] | ||
if dialect in csv.list_dialects(): | ||
dialect = csv.get_dialect(dialect) | ||
kwds = self._refresh_kwargs_based_on_dialect(kwds, dialect) | ||
|
||
# Any valid dialect should have these attributes. | ||
# If any are missing, we will raise automatically. | ||
for param in ( | ||
"delimiter", | ||
"doublequote", | ||
"escapechar", | ||
"skipinitialspace", | ||
"quotechar", | ||
"quoting", | ||
): | ||
try: | ||
dialect_val = getattr(dialect, param) | ||
except AttributeError as err: | ||
raise ValueError( | ||
f"Invalid dialect {kwds['dialect']} provided" | ||
) from err | ||
parser_default = _parser_defaults[param] | ||
provided = kwds.get(param, parser_default) | ||
|
||
# Messages for conflicting values between the dialect | ||
# instance and the actual parameters provided. | ||
conflict_msgs = [] | ||
|
||
# Don't warn if the default parameter was passed in, | ||
# even if it conflicts with the dialect (gh-23761). | ||
if provided != parser_default and provided != dialect_val: | ||
msg = ( | ||
f"Conflicting values for '{param}': '{provided}' was " | ||
f"provided, but the dialect specifies '{dialect_val}'. " | ||
"Using the dialect-specified value." | ||
) | ||
|
||
# Annoying corner case for not warning about | ||
# conflicts between dialect and delimiter parameter. | ||
# Refer to the outer "_read_" function for more info. | ||
if not (param == "delimiter" and kwds.pop("sep_override", False)): | ||
conflict_msgs.append(msg) | ||
|
||
if conflict_msgs: | ||
warnings.warn( | ||
"\n\n".join(conflict_msgs), ParserWarning, stacklevel=2 | ||
) | ||
kwds[param] = dialect_val | ||
|
||
if kwds.get("skipfooter"): | ||
if kwds.get("iterator") or kwds.get("chunksize"): | ||
raise ValueError("'skipfooter' not supported for 'iteration'") | ||
if kwds.get("nrows"): | ||
raise ValueError("'skipfooter' not supported with 'nrows'") | ||
self._validate_skipfooter(kwds) | ||
|
||
if kwds.get("header", "infer") == "infer": | ||
kwds["header"] = 0 if kwds.get("names") is None else None | ||
|
||
self.orig_options = kwds | ||
|
||
# miscellanea | ||
self.engine = engine | ||
self._currow = 0 | ||
|
||
options = self._get_options_with_defaults(engine) | ||
|
@@ -951,6 +903,61 @@ def __init__(self, f, engine=None, **kwds): | |
def close(self): | ||
self._engine.close() | ||
|
||
@staticmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't use staticmethods There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved to module level |
||
def _refresh_kwargs_based_on_dialect(kwds, dialect): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type and add a doc-string There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
"""Update kwargs based on the dialect parameters.""" | ||
# Any valid dialect should have these attributes. | ||
# If any are missing, we will raise automatically. | ||
mandatory_dialect_attrs = ( | ||
"delimiter", | ||
"doublequote", | ||
"escapechar", | ||
"skipinitialspace", | ||
"quotechar", | ||
"quoting", | ||
) | ||
|
||
for param in mandatory_dialect_attrs: | ||
try: | ||
dialect_val = getattr(dialect, param) | ||
except AttributeError as err: | ||
raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err | ||
|
||
parser_default = _parser_defaults[param] | ||
provided = kwds.get(param, parser_default) | ||
|
||
# Messages for conflicting values between the dialect | ||
# instance and the actual parameters provided. | ||
conflict_msgs = [] | ||
|
||
# Don't warn if the default parameter was passed in, | ||
# even if it conflicts with the dialect (gh-23761). | ||
if provided != parser_default and provided != dialect_val: | ||
msg = ( | ||
f"Conflicting values for '{param}': '{provided}' was " | ||
f"provided, but the dialect specifies '{dialect_val}'. " | ||
"Using the dialect-specified value." | ||
) | ||
|
||
# Annoying corner case for not warning about | ||
# conflicts between dialect and delimiter parameter. | ||
# Refer to the outer "_read_" function for more info. | ||
if not (param == "delimiter" and kwds.pop("sep_override", False)): | ||
conflict_msgs.append(msg) | ||
|
||
if conflict_msgs: | ||
warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2) | ||
kwds[param] = dialect_val | ||
return kwds | ||
|
||
@staticmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
def _validate_skipfooter(kwds): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
if kwds.get("skipfooter"): | ||
if kwds.get("iterator") or kwds.get("chunksize"): | ||
raise ValueError("'skipfooter' not supported for 'iteration'") | ||
if kwds.get("nrows"): | ||
raise ValueError("'skipfooter' not supported with 'nrows'") | ||
|
||
def _get_options_with_defaults(self, engine): | ||
kwds = self.orig_options | ||
|
||
|
@@ -1006,7 +1013,6 @@ def _check_file_or_buffer(self, f, engine): | |
def _clean_options(self, options, engine): | ||
result = options.copy() | ||
|
||
engine_specified = self._engine_specified | ||
fallback_reason = None | ||
|
||
# C engine not supported yet | ||
|
@@ -1070,7 +1076,7 @@ def _clean_options(self, options, engine): | |
) | ||
engine = "python" | ||
|
||
if fallback_reason and engine_specified: | ||
if fallback_reason and self._engine_specified: | ||
raise ValueError(fallback_reason) | ||
|
||
if engine == "c": | ||
|
@@ -1106,25 +1112,18 @@ def _clean_options(self, options, engine): | |
|
||
validate_header_arg(options["header"]) | ||
|
||
depr_warning = "" | ||
|
||
for arg in _deprecated_args: | ||
parser_default = _c_parser_defaults[arg] | ||
depr_default = _deprecated_defaults[arg] | ||
|
||
msg = ( | ||
f"The {repr(arg)} argument has been deprecated and will be " | ||
"removed in a future version." | ||
) | ||
|
||
if result.get(arg, depr_default) != depr_default: | ||
depr_warning += msg + "\n\n" | ||
msg = ( | ||
f"The {repr(arg)} argument has been deprecated and will be " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isnt |
||
"removed in a future version.\n\n" | ||
) | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
else: | ||
result[arg] = parser_default | ||
|
||
if depr_warning != "": | ||
warnings.warn(depr_warning, FutureWarning, stacklevel=2) | ||
|
||
if index_col is True: | ||
raise ValueError("The value of index_col couldn't be 'True'") | ||
if _is_index_col(index_col): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
merge master as some of this updated; and rename this like check_defaults_dialect (or similar)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
I renamed the function to
_merge_with_dialect_properties
as it actually merges kwargs with those provided by dialect (and resolve some conflicts).