From e3003cf0a34b010ce696747bf1ca886017854d70 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 22:13:48 +0700 Subject: [PATCH 1/8] CLN: remove outdated comment --- pandas/io/parsers.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4c619a636f057..f74adbab2f671 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -421,10 +421,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): kwds["encoding"] = encoding compression = kwds.get("compression", "infer") - # TODO: get_filepath_or_buffer could return - # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] - # though mypy handling of conditional imports is difficult. - # See https://github.com/python/mypy/issues/1297 ioargs = get_filepath_or_buffer( filepath_or_buffer, encoding, compression, storage_options=storage_options ) From 41096b4c802e88ca99f53969ee9b5c5e0ffd64ce Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 22:29:14 +0700 Subject: [PATCH 2/8] REF: place vars declaration closer to the use --- pandas/io/parsers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f74adbab2f671..967c01155a1dc 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -991,16 +991,15 @@ def _clean_options(self, options, engine): engine_specified = self._engine_specified fallback_reason = None - sep = options["delimiter"] - delim_whitespace = options["delim_whitespace"] - # C engine not supported yet if engine == "c": if options["skipfooter"] > 0: fallback_reason = "the 'c' engine does not support skipfooter" engine = "python" - encoding = sys.getfilesystemencoding() or "utf-8" + sep = options["delimiter"] + delim_whitespace = options["delim_whitespace"] + if sep is None and not delim_whitespace: if engine == "c": fallback_reason = ( @@ -1025,6 +1024,7 @@ def _clean_options(self, options, engine): result["delimiter"] = r"\s+" elif sep is not None: encodeable = True + encoding = sys.getfilesystemencoding() or "utf-8" try: if len(sep.encode(encoding)) > 1: encodeable = False From 3f95fcdabdad5da054ee15040e9165792bd769ab Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 23:05:53 +0700 Subject: [PATCH 3/8] CLN: remove return in _check_file_or_buffer --- pandas/io/parsers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 967c01155a1dc..1e97a383672e7 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -919,8 +919,7 @@ def __init__(self, f, engine=None, **kwds): self.nrows = options.pop("nrows", None) self.squeeze = options.pop("squeeze", False) - # might mutate self.engine - self.engine = self._check_file_or_buffer(f, engine) + self._check_file_or_buffer(f, engine) self.options, self.engine = self._clean_options(options, engine) if "has_index_names" in kwds: @@ -983,8 +982,6 @@ def _check_file_or_buffer(self, f, engine): msg = "The 'python' engine cannot iterate through this file buffer." raise ValueError(msg) - return engine - def _clean_options(self, options, engine): result = options.copy() From b844c77343ca692041ff9d6b1b3bd1fdd95e5de8 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 23:11:12 +0700 Subject: [PATCH 4/8] REF: use dict for engine mapping --- pandas/io/parsers.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1e97a383672e7..1a454e9daa4a3 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -925,7 +925,7 @@ def __init__(self, f, engine=None, **kwds): if "has_index_names" in kwds: self.options["has_index_names"] = kwds["has_index_names"] - self._make_engine(self.engine) + self._engine = self._make_engine(self.engine) def close(self): self._engine.close() @@ -1154,19 +1154,20 @@ def __next__(self): raise def _make_engine(self, engine="c"): - if engine == "c": - self._engine = CParserWrapper(self.f, **self.options) + mapping = { + "c": CParserWrapper, + "python": PythonParser, + "python-fwf": FixedWidthFieldParser, + } + try: + klass = mapping[engine] + except KeyError: + raise ValueError( + f"Unknown engine: {engine} (valid options " + f"are {mapping.keys()})" + ) else: - if engine == "python": - klass = PythonParser - elif engine == "python-fwf": - klass = FixedWidthFieldParser - else: - raise ValueError( - f"Unknown engine: {engine} (valid options " - 'are "c", "python", or "python-fwf")' - ) - self._engine = klass(self.f, **self.options) + return klass(self.f, **self.options) def _failover_to_python(self): raise AbstractMethodError(self) From 8f4981b1a1dcb77acdbc2c9ad9ed114bcd28b8ae Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 23:13:55 +0700 Subject: [PATCH 5/8] CLN: drop _engine assignment to None --- pandas/io/parsers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1a454e9daa4a3..b791a3d304e84 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -910,7 +910,6 @@ def __init__(self, f, engine=None, **kwds): # miscellanea self.engine = engine - self._engine = None self._currow = 0 options = self._get_options_with_defaults(engine) From e5541ba5d5e3d4a6d7c2e15680e234bcd2877f87 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 10 Sep 2020 23:22:40 +0700 Subject: [PATCH 6/8] CLN: reformat error message string --- pandas/io/parsers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b791a3d304e84..9e10bdaa14b5f 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1162,8 +1162,7 @@ def _make_engine(self, engine="c"): klass = mapping[engine] except KeyError: raise ValueError( - f"Unknown engine: {engine} (valid options " - f"are {mapping.keys()})" + f"Unknown engine: {engine} (valid options are {mapping.keys()})" ) else: return klass(self.f, **self.options) From fb4a04b39cf4876bb8175c5e2f1e5655a875ff15 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Fri, 11 Sep 2020 10:54:15 +0700 Subject: [PATCH 7/8] CLN: remove unnecessary method _create_index The method _create_index does only tuple unpacking. --- pandas/io/parsers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9e10bdaa14b5f..a8b6c444c2543 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1175,7 +1175,7 @@ def read(self, nrows=None): ret = self._engine.read(nrows) # May alter columns / col_dict - index, columns, col_dict = self._create_index(ret) + index, columns, col_dict = ret if index is None: if col_dict: @@ -1195,10 +1195,6 @@ def read(self, nrows=None): return df[df.columns[0]].copy() return df - def _create_index(self, ret): - index, columns, col_dict = ret - return index, columns, col_dict - def get_chunk(self, size=None): if size is None: size = self.chunksize From 1d61bf0a0f4e6496631faeecfd4ca5f7efe22ed7 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Fri, 11 Sep 2020 10:57:14 +0700 Subject: [PATCH 8/8] CLN: unpack values right away, remove comment --- pandas/io/parsers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a8b6c444c2543..b963d5be69b5f 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1172,10 +1172,7 @@ def _failover_to_python(self): def read(self, nrows=None): nrows = validate_integer("nrows", nrows) - ret = self._engine.read(nrows) - - # May alter columns / col_dict - index, columns, col_dict = ret + index, columns, col_dict = self._engine.read(nrows) if index is None: if col_dict: