Skip to content

REF/CLN: pandas/io/parsers.py #36852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 14, 2020
129 changes: 64 additions & 65 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,71 +867,23 @@ def __init__(self, f, engine=None, **kwds):
else:
engine = "python"
engine_specified = False

self.engine = engine
self._engine_specified = kwds.get("engine_specified", engine_specified)

if kwds.get("dialect") is not None:
dialect = kwds["dialect"]
if dialect in csv.list_dialects():
dialect = csv.get_dialect(dialect)
kwds = self._refresh_kwargs_based_on_dialect(kwds, dialect)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge master as some of this updated; and rename this like check_defaults_dialect (or similar)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
I renamed the function to _merge_with_dialect_properties as it actually merges kwargs with those provided by dialect (and resolve some conflicts).


# Any valid dialect should have these attributes.
# If any are missing, we will raise automatically.
for param in (
"delimiter",
"doublequote",
"escapechar",
"skipinitialspace",
"quotechar",
"quoting",
):
try:
dialect_val = getattr(dialect, param)
except AttributeError as err:
raise ValueError(
f"Invalid dialect {kwds['dialect']} provided"
) from err
parser_default = _parser_defaults[param]
provided = kwds.get(param, parser_default)

# Messages for conflicting values between the dialect
# instance and the actual parameters provided.
conflict_msgs = []

# Don't warn if the default parameter was passed in,
# even if it conflicts with the dialect (gh-23761).
if provided != parser_default and provided != dialect_val:
msg = (
f"Conflicting values for '{param}': '{provided}' was "
f"provided, but the dialect specifies '{dialect_val}'. "
"Using the dialect-specified value."
)

# Annoying corner case for not warning about
# conflicts between dialect and delimiter parameter.
# Refer to the outer "_read_" function for more info.
if not (param == "delimiter" and kwds.pop("sep_override", False)):
conflict_msgs.append(msg)

if conflict_msgs:
warnings.warn(
"\n\n".join(conflict_msgs), ParserWarning, stacklevel=2
)
kwds[param] = dialect_val

if kwds.get("skipfooter"):
if kwds.get("iterator") or kwds.get("chunksize"):
raise ValueError("'skipfooter' not supported for 'iteration'")
if kwds.get("nrows"):
raise ValueError("'skipfooter' not supported with 'nrows'")
self._validate_skipfooter(kwds)

if kwds.get("header", "infer") == "infer":
kwds["header"] = 0 if kwds.get("names") is None else None

self.orig_options = kwds

# miscellanea
self.engine = engine
self._currow = 0

options = self._get_options_with_defaults(engine)
Expand All @@ -951,6 +903,61 @@ def __init__(self, f, engine=None, **kwds):
def close(self):
self._engine.close()

@staticmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't use staticmethods

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to module level

def _refresh_kwargs_based_on_dialect(kwds, dialect):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type and add a doc-string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"""Update kwargs based on the dialect parameters."""
# Any valid dialect should have these attributes.
# If any are missing, we will raise automatically.
mandatory_dialect_attrs = (
"delimiter",
"doublequote",
"escapechar",
"skipinitialspace",
"quotechar",
"quoting",
)

for param in mandatory_dialect_attrs:
try:
dialect_val = getattr(dialect, param)
except AttributeError as err:
raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err

parser_default = _parser_defaults[param]
provided = kwds.get(param, parser_default)

# Messages for conflicting values between the dialect
# instance and the actual parameters provided.
conflict_msgs = []

# Don't warn if the default parameter was passed in,
# even if it conflicts with the dialect (gh-23761).
if provided != parser_default and provided != dialect_val:
msg = (
f"Conflicting values for '{param}': '{provided}' was "
f"provided, but the dialect specifies '{dialect_val}'. "
"Using the dialect-specified value."
)

# Annoying corner case for not warning about
# conflicts between dialect and delimiter parameter.
# Refer to the outer "_read_" function for more info.
if not (param == "delimiter" and kwds.pop("sep_override", False)):
conflict_msgs.append(msg)

if conflict_msgs:
warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2)
kwds[param] = dialect_val
return kwds

@staticmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

def _validate_skipfooter(kwds):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if kwds.get("skipfooter"):
if kwds.get("iterator") or kwds.get("chunksize"):
raise ValueError("'skipfooter' not supported for 'iteration'")
if kwds.get("nrows"):
raise ValueError("'skipfooter' not supported with 'nrows'")

def _get_options_with_defaults(self, engine):
kwds = self.orig_options

Expand Down Expand Up @@ -1006,7 +1013,6 @@ def _check_file_or_buffer(self, f, engine):
def _clean_options(self, options, engine):
result = options.copy()

engine_specified = self._engine_specified
fallback_reason = None

# C engine not supported yet
Expand Down Expand Up @@ -1070,7 +1076,7 @@ def _clean_options(self, options, engine):
)
engine = "python"

if fallback_reason and engine_specified:
if fallback_reason and self._engine_specified:
raise ValueError(fallback_reason)

if engine == "c":
Expand Down Expand Up @@ -1106,25 +1112,18 @@ def _clean_options(self, options, engine):

validate_header_arg(options["header"])

depr_warning = ""

for arg in _deprecated_args:
parser_default = _c_parser_defaults[arg]
depr_default = _deprecated_defaults[arg]

msg = (
f"The {repr(arg)} argument has been deprecated and will be "
"removed in a future version."
)

if result.get(arg, depr_default) != depr_default:
depr_warning += msg + "\n\n"
msg = (
f"The {repr(arg)} argument has been deprecated and will be "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isnt repr here redundant?

"removed in a future version.\n\n"
)
warnings.warn(msg, FutureWarning, stacklevel=2)
else:
result[arg] = parser_default

if depr_warning != "":
warnings.warn(depr_warning, FutureWarning, stacklevel=2)

if index_col is True:
raise ValueError("The value of index_col couldn't be 'True'")
if _is_index_col(index_col):
Expand Down