Skip to content

Commit 4dc5887

Browse files
authored
CLN: pandas/io/parsers.py (#36269)
1 parent 2da7c34 commit 4dc5887

File tree

1 file changed

+19
-34
lines changed

1 file changed

+19
-34
lines changed

pandas/io/parsers.py

+19-34
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
421421
kwds["encoding"] = encoding
422422
compression = kwds.get("compression", "infer")
423423

424-
# TODO: get_filepath_or_buffer could return
425-
# Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
426-
# though mypy handling of conditional imports is difficult.
427-
# See https://github.com/python/mypy/issues/1297
428424
ioargs = get_filepath_or_buffer(
429425
filepath_or_buffer, encoding, compression, storage_options=storage_options
430426
)
@@ -914,7 +910,6 @@ def __init__(self, f, engine=None, **kwds):
914910

915911
# miscellanea
916912
self.engine = engine
917-
self._engine = None
918913
self._currow = 0
919914

920915
options = self._get_options_with_defaults(engine)
@@ -923,14 +918,13 @@ def __init__(self, f, engine=None, **kwds):
923918
self.nrows = options.pop("nrows", None)
924919
self.squeeze = options.pop("squeeze", False)
925920

926-
# might mutate self.engine
927-
self.engine = self._check_file_or_buffer(f, engine)
921+
self._check_file_or_buffer(f, engine)
928922
self.options, self.engine = self._clean_options(options, engine)
929923

930924
if "has_index_names" in kwds:
931925
self.options["has_index_names"] = kwds["has_index_names"]
932926

933-
self._make_engine(self.engine)
927+
self._engine = self._make_engine(self.engine)
934928

935929
def close(self):
936930
self._engine.close()
@@ -987,24 +981,21 @@ def _check_file_or_buffer(self, f, engine):
987981
msg = "The 'python' engine cannot iterate through this file buffer."
988982
raise ValueError(msg)
989983

990-
return engine
991-
992984
def _clean_options(self, options, engine):
993985
result = options.copy()
994986

995987
engine_specified = self._engine_specified
996988
fallback_reason = None
997989

998-
sep = options["delimiter"]
999-
delim_whitespace = options["delim_whitespace"]
1000-
1001990
# C engine not supported yet
1002991
if engine == "c":
1003992
if options["skipfooter"] > 0:
1004993
fallback_reason = "the 'c' engine does not support skipfooter"
1005994
engine = "python"
1006995

1007-
encoding = sys.getfilesystemencoding() or "utf-8"
996+
sep = options["delimiter"]
997+
delim_whitespace = options["delim_whitespace"]
998+
1008999
if sep is None and not delim_whitespace:
10091000
if engine == "c":
10101001
fallback_reason = (
@@ -1029,6 +1020,7 @@ def _clean_options(self, options, engine):
10291020
result["delimiter"] = r"\s+"
10301021
elif sep is not None:
10311022
encodeable = True
1023+
encoding = sys.getfilesystemencoding() or "utf-8"
10321024
try:
10331025
if len(sep.encode(encoding)) > 1:
10341026
encodeable = False
@@ -1161,29 +1153,26 @@ def __next__(self):
11611153
raise
11621154

11631155
def _make_engine(self, engine="c"):
1164-
if engine == "c":
1165-
self._engine = CParserWrapper(self.f, **self.options)
1156+
mapping = {
1157+
"c": CParserWrapper,
1158+
"python": PythonParser,
1159+
"python-fwf": FixedWidthFieldParser,
1160+
}
1161+
try:
1162+
klass = mapping[engine]
1163+
except KeyError:
1164+
raise ValueError(
1165+
f"Unknown engine: {engine} (valid options are {mapping.keys()})"
1166+
)
11661167
else:
1167-
if engine == "python":
1168-
klass = PythonParser
1169-
elif engine == "python-fwf":
1170-
klass = FixedWidthFieldParser
1171-
else:
1172-
raise ValueError(
1173-
f"Unknown engine: {engine} (valid options "
1174-
'are "c", "python", or "python-fwf")'
1175-
)
1176-
self._engine = klass(self.f, **self.options)
1168+
return klass(self.f, **self.options)
11771169

11781170
def _failover_to_python(self):
11791171
raise AbstractMethodError(self)
11801172

11811173
def read(self, nrows=None):
11821174
nrows = validate_integer("nrows", nrows)
1183-
ret = self._engine.read(nrows)
1184-
1185-
# May alter columns / col_dict
1186-
index, columns, col_dict = self._create_index(ret)
1175+
index, columns, col_dict = self._engine.read(nrows)
11871176

11881177
if index is None:
11891178
if col_dict:
@@ -1203,10 +1192,6 @@ def read(self, nrows=None):
12031192
return df[df.columns[0]].copy()
12041193
return df
12051194

1206-
def _create_index(self, ret):
1207-
index, columns, col_dict = ret
1208-
return index, columns, col_dict
1209-
12101195
def get_chunk(self, size=None):
12111196
if size is None:
12121197
size = self.chunksize

0 commit comments

Comments
 (0)