From 39ab28ad283444e6c420355163346799aa8314a6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 14 Nov 2018 14:50:58 -0800 Subject: [PATCH 1/2] ERR: Fail-fast with incompatible skipfooter combos * Don't create the iterator and error immediately if the skipfooter parameter is passed in. * Raise the correct error message when nrows is passed in with skipfooter. --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/io/parsers.py | 11 ++++++----- pandas/tests/io/parser/common.py | 21 +++++++++++++++------ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 007f5b7feb060..34d0814d240c8 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1368,6 +1368,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) +- Bug in :meth:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue`23711`) - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9fd35effe1b07..63562e41e2281 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -787,6 +787,12 @@ def __init__(self, f, engine=None, **kwds): stacklevel=2) kwds[param] = dialect_val + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for 'iteration'") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") + if kwds.get('header', 'infer') == 'infer': kwds['header'] = 0 if kwds.get('names') is None else None @@ -1054,11 +1060,6 @@ def _failover_to_python(self): def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) - - if nrows is not None: - if self.options.get('skipfooter'): - raise ValueError('skipfooter not supported for iteration') - ret = self._engine.read(nrows) # May alter columns / col_dict diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 67a3bd6f9b75e..6ce08f10e2d00 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -537,12 +537,21 @@ def test_iterator(self): assert len(result) == 3 tm.assert_frame_equal(pd.concat(result), expected) - # skipfooter is not supported with the C parser yet - if self.engine == 'python': - # test bad parameter (skipfooter) - reader = self.read_csv(StringIO(self.data1), index_col=0, - iterator=True, skipfooter=1) - pytest.raises(ValueError, reader.read, 3) + @pytest.mark.parametrize("kwargs", [ + dict(iterator=True, + chunksize=1), + dict(iterator=True), + dict(chunksize=1) + ]) + def test_iterator_skipfooter_errors(self, kwargs): + msg = "'skipfooter' not supported for 'iteration'" + with pytest.raises(ValueError, match=msg): + self.read_csv(StringIO(self.data1), skipfooter=1, **kwargs) + + def test_nrows_skipfooter_errors(self): + msg = "'skipfooter' not supported with 'nrows'" + with pytest.raises(ValueError, match=msg): + self.read_csv(StringIO(self.data1), skipfooter=1, nrows=5) def test_pass_names_with_index(self): lines = self.data1.split('\n') From cca6501dd674ed7d253fe5051c2b97292171a46e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 15 Nov 2018 11:55:32 -0800 Subject: [PATCH 2/2] Fix doc lint errors --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 34d0814d240c8..34774f76de213 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1368,7 +1368,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) -- Bug in :meth:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue`23711`) +- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`) - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)