Skip to content

Commit 66f3917

Browse files
authored
REF/CLN: pandas/io/parsers.py (#36852)
1 parent e4e1b63 commit 66f3917

File tree

1 file changed

+107
-71
lines changed

1 file changed

+107
-71
lines changed

pandas/io/parsers.py

+107-71
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ def read_csv(
606606
del kwds["filepath_or_buffer"]
607607
del kwds["sep"]
608608

609-
kwds_defaults = _check_defaults_read(
609+
kwds_defaults = _refine_defaults_read(
610610
dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","}
611611
)
612612
kwds.update(kwds_defaults)
@@ -684,7 +684,7 @@ def read_table(
684684
del kwds["filepath_or_buffer"]
685685
del kwds["sep"]
686686

687-
kwds_defaults = _check_defaults_read(
687+
kwds_defaults = _refine_defaults_read(
688688
dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"}
689689
)
690690
kwds.update(kwds_defaults)
@@ -789,71 +789,23 @@ def __init__(self, f, engine=None, **kwds):
789789
else:
790790
engine = "python"
791791
engine_specified = False
792-
792+
self.engine = engine
793793
self._engine_specified = kwds.get("engine_specified", engine_specified)
794794

795+
_validate_skipfooter(kwds)
796+
795797
if kwds.get("dialect") is not None:
796798
dialect = kwds["dialect"]
797799
if dialect in csv.list_dialects():
798800
dialect = csv.get_dialect(dialect)
799-
800-
# Any valid dialect should have these attributes.
801-
# If any are missing, we will raise automatically.
802-
for param in (
803-
"delimiter",
804-
"doublequote",
805-
"escapechar",
806-
"skipinitialspace",
807-
"quotechar",
808-
"quoting",
809-
):
810-
try:
811-
dialect_val = getattr(dialect, param)
812-
except AttributeError as err:
813-
raise ValueError(
814-
f"Invalid dialect {kwds['dialect']} provided"
815-
) from err
816-
parser_default = _parser_defaults[param]
817-
provided = kwds.get(param, parser_default)
818-
819-
# Messages for conflicting values between the dialect
820-
# instance and the actual parameters provided.
821-
conflict_msgs = []
822-
823-
# Don't warn if the default parameter was passed in,
824-
# even if it conflicts with the dialect (gh-23761).
825-
if provided != parser_default and provided != dialect_val:
826-
msg = (
827-
f"Conflicting values for '{param}': '{provided}' was "
828-
f"provided, but the dialect specifies '{dialect_val}'. "
829-
"Using the dialect-specified value."
830-
)
831-
832-
# Annoying corner case for not warning about
833-
# conflicts between dialect and delimiter parameter.
834-
# Refer to the outer "_read_" function for more info.
835-
if not (param == "delimiter" and kwds.pop("sep_override", False)):
836-
conflict_msgs.append(msg)
837-
838-
if conflict_msgs:
839-
warnings.warn(
840-
"\n\n".join(conflict_msgs), ParserWarning, stacklevel=2
841-
)
842-
kwds[param] = dialect_val
843-
844-
if kwds.get("skipfooter"):
845-
if kwds.get("iterator") or kwds.get("chunksize"):
846-
raise ValueError("'skipfooter' not supported for 'iteration'")
847-
if kwds.get("nrows"):
848-
raise ValueError("'skipfooter' not supported with 'nrows'")
801+
kwds = _merge_with_dialect_properties(dialect, kwds)
849802

850803
if kwds.get("header", "infer") == "infer":
851804
kwds["header"] = 0 if kwds.get("names") is None else None
852805

853806
self.orig_options = kwds
854807

855808
# miscellanea
856-
self.engine = engine
857809
self._currow = 0
858810

859811
options = self._get_options_with_defaults(engine)
@@ -928,7 +880,6 @@ def _check_file_or_buffer(self, f, engine):
928880
def _clean_options(self, options, engine):
929881
result = options.copy()
930882

931-
engine_specified = self._engine_specified
932883
fallback_reason = None
933884

934885
# C engine not supported yet
@@ -992,7 +943,7 @@ def _clean_options(self, options, engine):
992943
)
993944
engine = "python"
994945

995-
if fallback_reason and engine_specified:
946+
if fallback_reason and self._engine_specified:
996947
raise ValueError(fallback_reason)
997948

998949
if engine == "c":
@@ -1028,25 +979,18 @@ def _clean_options(self, options, engine):
1028979

1029980
validate_header_arg(options["header"])
1030981

1031-
depr_warning = ""
1032-
1033982
for arg in _deprecated_args:
1034983
parser_default = _c_parser_defaults[arg]
1035984
depr_default = _deprecated_defaults[arg]
1036-
1037-
msg = (
1038-
f"The {repr(arg)} argument has been deprecated and will be "
1039-
"removed in a future version."
1040-
)
1041-
1042985
if result.get(arg, depr_default) != depr_default:
1043-
depr_warning += msg + "\n\n"
986+
msg = (
987+
f"The {arg} argument has been deprecated and will be "
988+
"removed in a future version.\n\n"
989+
)
990+
warnings.warn(msg, FutureWarning, stacklevel=2)
1044991
else:
1045992
result[arg] = parser_default
1046993

1047-
if depr_warning != "":
1048-
warnings.warn(depr_warning, FutureWarning, stacklevel=2)
1049-
1050994
if index_col is True:
1051995
raise ValueError("The value of index_col couldn't be 'True'")
1052996
if _is_index_col(index_col):
@@ -3706,15 +3650,15 @@ def _make_reader(self, f):
37063650
)
37073651

37083652

3709-
def _check_defaults_read(
3653+
def _refine_defaults_read(
37103654
dialect: Union[str, csv.Dialect],
37113655
delimiter: Union[str, object],
37123656
delim_whitespace: bool,
37133657
engine: str,
37143658
sep: Union[str, object],
37153659
defaults: Dict[str, Any],
37163660
):
3717-
"""Check default values of input parameters of read_csv, read_table.
3661+
"""Validate/refine default values of input parameters of read_csv, read_table.
37183662
37193663
Parameters
37203664
----------
@@ -3766,7 +3710,7 @@ def _check_defaults_read(
37663710
# the comparison to dialect values by checking if default values
37673711
# for BOTH "delimiter" and "sep" were provided.
37683712
if dialect is not None:
3769-
kwds["sep_override"] = (delimiter is None) and (
3713+
kwds["sep_override"] = delimiter is None and (
37703714
sep is lib.no_default or sep == delim_default
37713715
)
37723716

@@ -3793,3 +3737,95 @@ def _check_defaults_read(
37933737
kwds["engine_specified"] = False
37943738

37953739
return kwds
3740+
3741+
3742+
def _merge_with_dialect_properties(
3743+
dialect: csv.Dialect,
3744+
defaults: Dict[str, Any],
3745+
) -> Dict[str, Any]:
3746+
"""
3747+
Merge default kwargs in TextFileReader with dialect parameters.
3748+
3749+
Parameters
3750+
----------
3751+
dialect : csv.Dialect
3752+
Concrete csv dialect. See csv.Dialect documentation for more details.
3753+
defaults : dict
3754+
Keyword arguments passed to TextFileReader.
3755+
3756+
Returns
3757+
-------
3758+
kwds : dict
3759+
Updated keyword arguments, merged with dialect parameters.
3760+
3761+
Raises
3762+
------
3763+
ValueError
3764+
If incorrect dialect is provided.
3765+
"""
3766+
kwds = defaults.copy()
3767+
3768+
# Any valid dialect should have these attributes.
3769+
# If any are missing, we will raise automatically.
3770+
mandatory_dialect_attrs = (
3771+
"delimiter",
3772+
"doublequote",
3773+
"escapechar",
3774+
"skipinitialspace",
3775+
"quotechar",
3776+
"quoting",
3777+
)
3778+
3779+
for param in mandatory_dialect_attrs:
3780+
try:
3781+
dialect_val = getattr(dialect, param)
3782+
except AttributeError as err:
3783+
raise ValueError(f"Invalid dialect {dialect} provided") from err
3784+
3785+
parser_default = _parser_defaults[param]
3786+
provided = kwds.get(param, parser_default)
3787+
3788+
# Messages for conflicting values between the dialect
3789+
# instance and the actual parameters provided.
3790+
conflict_msgs = []
3791+
3792+
# Don't warn if the default parameter was passed in,
3793+
# even if it conflicts with the dialect (gh-23761).
3794+
if provided != parser_default and provided != dialect_val:
3795+
msg = (
3796+
f"Conflicting values for '{param}': '{provided}' was "
3797+
f"provided, but the dialect specifies '{dialect_val}'. "
3798+
"Using the dialect-specified value."
3799+
)
3800+
3801+
# Annoying corner case for not warning about
3802+
# conflicts between dialect and delimiter parameter.
3803+
# Refer to the outer "_read_" function for more info.
3804+
if not (param == "delimiter" and kwds.pop("sep_override", False)):
3805+
conflict_msgs.append(msg)
3806+
3807+
if conflict_msgs:
3808+
warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2)
3809+
kwds[param] = dialect_val
3810+
return kwds
3811+
3812+
3813+
def _validate_skipfooter(kwds: Dict[str, Any]) -> None:
3814+
"""
3815+
Check whether skipfooter is compatible with other kwargs in TextFileReader.
3816+
3817+
Parameters
3818+
----------
3819+
kwds : dict
3820+
Keyword arguments passed to TextFileReader.
3821+
3822+
Raises
3823+
------
3824+
ValueError
3825+
If skipfooter is not compatible with other parameters.
3826+
"""
3827+
if kwds.get("skipfooter"):
3828+
if kwds.get("iterator") or kwds.get("chunksize"):
3829+
raise ValueError("'skipfooter' not supported for 'iteration'")
3830+
if kwds.get("nrows"):
3831+
raise ValueError("'skipfooter' not supported with 'nrows'")

0 commit comments

Comments
 (0)