From 0a87ba44d487aec20de03d8465c2dc649f8fc57c Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 13:28:30 +0700 Subject: [PATCH 01/11] REF: ext method _refresh_kwargs_based_on_dialect --- pandas/io/parsers.py | 100 ++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index dd3588faedf7a..feb6cd3f7226b 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -874,56 +874,8 @@ def __init__(self, f, engine=None, **kwds): dialect = kwds["dialect"] if dialect in csv.list_dialects(): dialect = csv.get_dialect(dialect) + kwds = self._refresh_kwargs_based_on_dialect(kwds, dialect) - # Any valid dialect should have these attributes. - # If any are missing, we will raise automatically. - for param in ( - "delimiter", - "doublequote", - "escapechar", - "skipinitialspace", - "quotechar", - "quoting", - ): - try: - dialect_val = getattr(dialect, param) - except AttributeError as err: - raise ValueError( - f"Invalid dialect {kwds['dialect']} provided" - ) from err - parser_default = _parser_defaults[param] - provided = kwds.get(param, parser_default) - - # Messages for conflicting values between the dialect - # instance and the actual parameters provided. - conflict_msgs = [] - - # Don't warn if the default parameter was passed in, - # even if it conflicts with the dialect (gh-23761). - if provided != parser_default and provided != dialect_val: - msg = ( - f"Conflicting values for '{param}': '{provided}' was " - f"provided, but the dialect specifies '{dialect_val}'. " - "Using the dialect-specified value." - ) - - # Annoying corner case for not warning about - # conflicts between dialect and delimiter parameter. - # Refer to the outer "_read_" function for more info. - if not (param == "delimiter" and kwds.pop("sep_override", False)): - conflict_msgs.append(msg) - - if conflict_msgs: - warnings.warn( - "\n\n".join(conflict_msgs), ParserWarning, stacklevel=2 - ) - kwds[param] = dialect_val - - if kwds.get("skipfooter"): - if kwds.get("iterator") or kwds.get("chunksize"): - raise ValueError("'skipfooter' not supported for 'iteration'") - if kwds.get("nrows"): - raise ValueError("'skipfooter' not supported with 'nrows'") if kwds.get("header", "infer") == "infer": kwds["header"] = 0 if kwds.get("names") is None else None @@ -951,6 +903,56 @@ def __init__(self, f, engine=None, **kwds): def close(self): self._engine.close() + def _refresh_kwargs_based_on_dialect(self, kwds, dialect): + """Update kwargs based on the dialect parameters.""" + # Any valid dialect should have these attributes. + # If any are missing, we will raise automatically. + mandatory_dialect_attrs = ( + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", + ) + + for param in mandatory_dialect_attrs: + try: + dialect_val = getattr(dialect, param) + except AttributeError as err: + raise ValueError( + f"Invalid dialect {kwds['dialect']} provided" + ) from err + + parser_default = _parser_defaults[param] + provided = kwds.get(param, parser_default) + + # Messages for conflicting values between the dialect + # instance and the actual parameters provided. + conflict_msgs = [] + + # Don't warn if the default parameter was passed in, + # even if it conflicts with the dialect (gh-23761). + if provided != parser_default and provided != dialect_val: + msg = ( + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." + ) + + # Annoying corner case for not warning about + # conflicts between dialect and delimiter parameter. + # Refer to the outer "_read_" function for more info. + if not (param == "delimiter" and kwds.pop("sep_override", False)): + conflict_msgs.append(msg) + + if conflict_msgs: + warnings.warn( + "\n\n".join(conflict_msgs), ParserWarning, stacklevel=2 + ) + kwds[param] = dialect_val + return kwds + def _get_options_with_defaults(self, engine): kwds = self.orig_options From 63e625d2e71727bf673acfc7120ee0e6dc4c437c Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 13:29:00 +0700 Subject: [PATCH 02/11] REF: extract method _validate_skipfooter --- pandas/io/parsers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index feb6cd3f7226b..8e8c6d9ac1cfa 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -876,6 +876,7 @@ def __init__(self, f, engine=None, **kwds): dialect = csv.get_dialect(dialect) kwds = self._refresh_kwargs_based_on_dialect(kwds, dialect) + self._validate_skipfooter(kwds) if kwds.get("header", "infer") == "infer": kwds["header"] = 0 if kwds.get("names") is None else None @@ -953,6 +954,13 @@ def _refresh_kwargs_based_on_dialect(self, kwds, dialect): kwds[param] = dialect_val return kwds + def _validate_skipfooter(self, kwds): + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for 'iteration'") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") + def _get_options_with_defaults(self, engine): kwds = self.orig_options From 7ac3b6d2665537682827eab45138c17d82d9d805 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 14:02:13 +0700 Subject: [PATCH 03/11] REF: drop local variable engine_specified --- pandas/io/parsers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8e8c6d9ac1cfa..f93bdbc77ff1d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -867,7 +867,7 @@ def __init__(self, f, engine=None, **kwds): else: engine = "python" engine_specified = False - + self.engine = engine self._engine_specified = kwds.get("engine_specified", engine_specified) if kwds.get("dialect") is not None: @@ -884,7 +884,6 @@ def __init__(self, f, engine=None, **kwds): self.orig_options = kwds # miscellanea - self.engine = engine self._currow = 0 options = self._get_options_with_defaults(engine) @@ -1016,7 +1015,6 @@ def _check_file_or_buffer(self, f, engine): def _clean_options(self, options, engine): result = options.copy() - engine_specified = self._engine_specified fallback_reason = None # C engine not supported yet @@ -1080,7 +1078,7 @@ def _clean_options(self, options, engine): ) engine = "python" - if fallback_reason and engine_specified: + if fallback_reason and self._engine_specified: raise ValueError(fallback_reason) if engine == "c": From 8dc74fdc84a5f2734f4bc3b9dcc37c8ce3f7d6fd Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 14:08:51 +0700 Subject: [PATCH 04/11] CLN: clean FutureWarning issue --- pandas/io/parsers.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f93bdbc77ff1d..08055a8f507b5 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1114,25 +1114,18 @@ def _clean_options(self, options, engine): validate_header_arg(options["header"]) - depr_warning = "" - for arg in _deprecated_args: parser_default = _c_parser_defaults[arg] depr_default = _deprecated_defaults[arg] - - msg = ( - f"The {repr(arg)} argument has been deprecated and will be " - "removed in a future version." - ) - if result.get(arg, depr_default) != depr_default: - depr_warning += msg + "\n\n" + msg = ( + f"The {repr(arg)} argument has been deprecated and will be " + "removed in a future version.\n\n" + ) + warnings.warn(msg, FutureWarning, stacklevel=2) else: result[arg] = parser_default - if depr_warning != "": - warnings.warn(depr_warning, FutureWarning, stacklevel=2) - if index_col is True: raise ValueError("The value of index_col couldn't be 'True'") if _is_index_col(index_col): From 06fcf4c81868429c2eaf19e6524a433a1b67bf66 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 14:27:01 +0700 Subject: [PATCH 05/11] CLN: make validation methods as static --- pandas/io/parsers.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 08055a8f507b5..80f33a0c6c8aa 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -903,7 +903,8 @@ def __init__(self, f, engine=None, **kwds): def close(self): self._engine.close() - def _refresh_kwargs_based_on_dialect(self, kwds, dialect): + @staticmethod + def _refresh_kwargs_based_on_dialect(kwds, dialect): """Update kwargs based on the dialect parameters.""" # Any valid dialect should have these attributes. # If any are missing, we will raise automatically. @@ -920,9 +921,7 @@ def _refresh_kwargs_based_on_dialect(self, kwds, dialect): try: dialect_val = getattr(dialect, param) except AttributeError as err: - raise ValueError( - f"Invalid dialect {kwds['dialect']} provided" - ) from err + raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default) @@ -947,13 +946,12 @@ def _refresh_kwargs_based_on_dialect(self, kwds, dialect): conflict_msgs.append(msg) if conflict_msgs: - warnings.warn( - "\n\n".join(conflict_msgs), ParserWarning, stacklevel=2 - ) + warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2) kwds[param] = dialect_val return kwds - def _validate_skipfooter(self, kwds): + @staticmethod + def _validate_skipfooter(kwds): if kwds.get("skipfooter"): if kwds.get("iterator") or kwds.get("chunksize"): raise ValueError("'skipfooter' not supported for 'iteration'") From 4c024da4075e1baedea67d43130c44558da15dab Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 8 Oct 2020 22:59:36 +0700 Subject: [PATCH 06/11] TYP: annotate methods and move to module level --- pandas/io/parsers.py | 119 ++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 58 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 012e0035ff964..6625dbcd4aca2 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -794,9 +794,9 @@ def __init__(self, f, engine=None, **kwds): dialect = kwds["dialect"] if dialect in csv.list_dialects(): dialect = csv.get_dialect(dialect) - kwds = self._refresh_kwargs_based_on_dialect(kwds, dialect) + kwds = _merge_with_dialect_properties(kwds, dialect) - self._validate_skipfooter(kwds) + _validate_skipfooter(kwds) if kwds.get("header", "infer") == "infer": kwds["header"] = 0 if kwds.get("names") is None else None @@ -823,61 +823,6 @@ def __init__(self, f, engine=None, **kwds): def close(self): self._engine.close() - @staticmethod - def _refresh_kwargs_based_on_dialect(kwds, dialect): - """Update kwargs based on the dialect parameters.""" - # Any valid dialect should have these attributes. - # If any are missing, we will raise automatically. - mandatory_dialect_attrs = ( - "delimiter", - "doublequote", - "escapechar", - "skipinitialspace", - "quotechar", - "quoting", - ) - - for param in mandatory_dialect_attrs: - try: - dialect_val = getattr(dialect, param) - except AttributeError as err: - raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err - - parser_default = _parser_defaults[param] - provided = kwds.get(param, parser_default) - - # Messages for conflicting values between the dialect - # instance and the actual parameters provided. - conflict_msgs = [] - - # Don't warn if the default parameter was passed in, - # even if it conflicts with the dialect (gh-23761). - if provided != parser_default and provided != dialect_val: - msg = ( - f"Conflicting values for '{param}': '{provided}' was " - f"provided, but the dialect specifies '{dialect_val}'. " - "Using the dialect-specified value." - ) - - # Annoying corner case for not warning about - # conflicts between dialect and delimiter parameter. - # Refer to the outer "_read_" function for more info. - if not (param == "delimiter" and kwds.pop("sep_override", False)): - conflict_msgs.append(msg) - - if conflict_msgs: - warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2) - kwds[param] = dialect_val - return kwds - - @staticmethod - def _validate_skipfooter(kwds): - if kwds.get("skipfooter"): - if kwds.get("iterator") or kwds.get("chunksize"): - raise ValueError("'skipfooter' not supported for 'iteration'") - if kwds.get("nrows"): - raise ValueError("'skipfooter' not supported with 'nrows'") - def _get_options_with_defaults(self, engine): kwds = self.orig_options @@ -1037,7 +982,7 @@ def _clean_options(self, options, engine): depr_default = _deprecated_defaults[arg] if result.get(arg, depr_default) != depr_default: msg = ( - f"The {repr(arg)} argument has been deprecated and will be " + f"The {arg} argument has been deprecated and will be " "removed in a future version.\n\n" ) warnings.warn(msg, FutureWarning, stacklevel=2) @@ -3790,3 +3735,61 @@ def _check_defaults_read( kwds["engine_specified"] = False return kwds + + +def _merge_with_dialect_properties( + kwds: Dict[str, Any], + dialect: csv.Dialect, +) -> Dict[str, Any]: + """Update kwargs based on the dialect parameters.""" + # Any valid dialect should have these attributes. + # If any are missing, we will raise automatically. + mandatory_dialect_attrs = ( + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", + ) + + for param in mandatory_dialect_attrs: + try: + dialect_val = getattr(dialect, param) + except AttributeError as err: + raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err + + parser_default = _parser_defaults[param] + provided = kwds.get(param, parser_default) + + # Messages for conflicting values between the dialect + # instance and the actual parameters provided. + conflict_msgs = [] + + # Don't warn if the default parameter was passed in, + # even if it conflicts with the dialect (gh-23761). + if provided != parser_default and provided != dialect_val: + msg = ( + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." + ) + + # Annoying corner case for not warning about + # conflicts between dialect and delimiter parameter. + # Refer to the outer "_read_" function for more info. + if not (param == "delimiter" and kwds.pop("sep_override", False)): + conflict_msgs.append(msg) + + if conflict_msgs: + warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2) + kwds[param] = dialect_val + return kwds + + +def _validate_skipfooter(kwds: Dict[str, Any]) -> None: + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for 'iteration'") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") From f10984fd96a05bff904865dbf9a7f17748b9de35 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 8 Oct 2020 23:23:34 +0700 Subject: [PATCH 07/11] DOC: add docstrings --- pandas/io/parsers.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6625dbcd4aca2..42ddea0e4d376 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -790,14 +790,14 @@ def __init__(self, f, engine=None, **kwds): self.engine = engine self._engine_specified = kwds.get("engine_specified", engine_specified) + _validate_skipfooter(kwds) + if kwds.get("dialect") is not None: dialect = kwds["dialect"] if dialect in csv.list_dialects(): dialect = csv.get_dialect(dialect) kwds = _merge_with_dialect_properties(kwds, dialect) - _validate_skipfooter(kwds) - if kwds.get("header", "infer") == "infer": kwds["header"] = 0 if kwds.get("names") is None else None @@ -3741,7 +3741,26 @@ def _merge_with_dialect_properties( kwds: Dict[str, Any], dialect: csv.Dialect, ) -> Dict[str, Any]: - """Update kwargs based on the dialect parameters.""" + """ + Merge kwargs in TextFileReader with dialect parameters. + + Parameters + ---------- + kwds : dict + Keyword arguments passed to TextFileReader. + dialect : csv.Dialect + Concrete csv dialect. See csv.Dialect documentation for more details. + + Returns + ------- + kwds : dict + Updated keyword arguments, merged with dialect parameters. + + Raises + ------ + ValueError + If incorrect dialect is provided. + """ # Any valid dialect should have these attributes. # If any are missing, we will raise automatically. mandatory_dialect_attrs = ( @@ -3788,6 +3807,19 @@ def _merge_with_dialect_properties( def _validate_skipfooter(kwds: Dict[str, Any]) -> None: + """ + Check whether skipfooter is compatible with other kwargs in TextFileReader. + + Parameters + ---------- + kwds : dict + Keyword arguments passed to TextFileReader. + + Raises + ------ + ValueError + If skipfooter is not compatible with other parameters. + """ if kwds.get("skipfooter"): if kwds.get("iterator") or kwds.get("chunksize"): raise ValueError("'skipfooter' not supported for 'iteration'") From 19f6533fe3620152ef5d86b91e0d5e61f06d9557 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 8 Oct 2020 23:23:52 +0700 Subject: [PATCH 08/11] CLN: read dialect name directly from its instance --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 42ddea0e4d376..2b6b3505d961e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3776,7 +3776,7 @@ def _merge_with_dialect_properties( try: dialect_val = getattr(dialect, param) except AttributeError as err: - raise ValueError(f"Invalid dialect {kwds['dialect']} provided") from err + raise ValueError(f"Invalid dialect {dialect.__name__} provided") from err parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default) From c8e0bc9bf95050710d6dfe87e22ade4499963302 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 11 Oct 2020 21:40:46 +0700 Subject: [PATCH 09/11] REF: reverse args in _merge_with_dialect_properties --- pandas/io/parsers.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2b6b3505d961e..a53636a442dc4 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -796,7 +796,7 @@ def __init__(self, f, engine=None, **kwds): dialect = kwds["dialect"] if dialect in csv.list_dialects(): dialect = csv.get_dialect(dialect) - kwds = _merge_with_dialect_properties(kwds, dialect) + kwds = _merge_with_dialect_properties(dialect, kwds) if kwds.get("header", "infer") == "infer": kwds["header"] = 0 if kwds.get("names") is None else None @@ -3738,18 +3738,18 @@ def _check_defaults_read( def _merge_with_dialect_properties( - kwds: Dict[str, Any], dialect: csv.Dialect, + defaults: Dict[str, Any], ) -> Dict[str, Any]: """ - Merge kwargs in TextFileReader with dialect parameters. + Merge default kwargs in TextFileReader with dialect parameters. Parameters ---------- - kwds : dict - Keyword arguments passed to TextFileReader. dialect : csv.Dialect Concrete csv dialect. See csv.Dialect documentation for more details. + defaults : dict + Keyword arguments passed to TextFileReader. Returns ------- @@ -3761,6 +3761,8 @@ def _merge_with_dialect_properties( ValueError If incorrect dialect is provided. """ + kwds = defaults.copy() + # Any valid dialect should have these attributes. # If any are missing, we will raise automatically. mandatory_dialect_attrs = ( From fc1b62749f66d58f78cddf92452b642cd8e766c4 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 11 Oct 2020 21:48:07 +0700 Subject: [PATCH 10/11] REF: _check_defaults_read -> _refine_defaults_read --- pandas/io/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a53636a442dc4..20acfc1e4a188 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -604,7 +604,7 @@ def read_csv( del kwds["filepath_or_buffer"] del kwds["sep"] - kwds_defaults = _check_defaults_read( + kwds_defaults = _refine_defaults_read( dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","} ) kwds.update(kwds_defaults) @@ -682,7 +682,7 @@ def read_table( del kwds["filepath_or_buffer"] del kwds["sep"] - kwds_defaults = _check_defaults_read( + kwds_defaults = _refine_defaults_read( dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"} ) kwds.update(kwds_defaults) @@ -3648,7 +3648,7 @@ def _make_reader(self, f): ) -def _check_defaults_read( +def _refine_defaults_read( dialect: Union[str, csv.Dialect], delimiter: Union[str, object], delim_whitespace: bool, @@ -3656,7 +3656,7 @@ def _check_defaults_read( sep: Union[str, object], defaults: Dict[str, Any], ): - """Check default values of input parameters of read_csv, read_table. + """Validate/refine default values of input parameters of read_csv, read_table. Parameters ---------- @@ -3708,7 +3708,7 @@ def _check_defaults_read( # the comparison to dialect values by checking if default values # for BOTH "delimiter" and "sep" were provided. if dialect is not None: - kwds["sep_override"] = (delimiter is None) and ( + kwds["sep_override"] = delimiter is None and ( sep is lib.no_default or sep == delim_default ) From ba4a389728c2496f5d125ff803ef566a88a6ebe1 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 12 Oct 2020 00:02:51 +0700 Subject: [PATCH 11/11] REF: drop getting __name__ attr of dialect --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 53a2c0e1b7b54..866854ff1fe33 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3780,7 +3780,7 @@ def _merge_with_dialect_properties( try: dialect_val = getattr(dialect, param) except AttributeError as err: - raise ValueError(f"Invalid dialect {dialect.__name__} provided") from err + raise ValueError(f"Invalid dialect {dialect} provided") from err parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default)