diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 39e3894f86302..693923522e437 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -653,7 +653,7 @@ I/O - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`) - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) -- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`) +- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`) - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`) - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) - Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index f5420618c0235..153ba171199e6 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1048,6 +1048,7 @@ def _get_lines(self, rows=None): assert self.data is not None new_rows.append(next(self.data)) + len_new_rows = len(new_rows) new_rows = self._remove_skipped_rows(new_rows) lines.extend(new_rows) else: @@ -1059,13 +1060,15 @@ def _get_lines(self, rows=None): if new_row is not None: new_rows.append(new_row) + len_new_rows = len(new_rows) except StopIteration: + len_new_rows = len(new_rows) new_rows = self._remove_skipped_rows(new_rows) lines.extend(new_rows) if len(lines) == 0: raise - self.pos += len(new_rows) + self.pos += len_new_rows self.buf = [] else: diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 910731bd7dde2..3f43ea0b8a12d 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -877,3 +877,33 @@ def test_skip_rows_and_n_rows(): result = read_fwf(StringIO(data), nrows=4, skiprows=[2, 4]) expected = DataFrame({"a": [1, 3, 5, 6], "b": ["a", "c", "e", "f"]}) tm.assert_frame_equal(result, expected) + + +def test_skiprows_with_iterator(): + # GH#10261 + data = """0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + """ + df_iter = read_fwf( + StringIO(data), + colspecs=[(0, 2)], + names=["a"], + iterator=True, + chunksize=2, + skiprows=[0, 1, 2, 6, 9], + ) + expected_frames = [ + DataFrame({"a": [3, 4]}), + DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]), + DataFrame({"a": []}, index=[], dtype="object"), + ] + for i, result in enumerate(df_iter): + tm.assert_frame_equal(result, expected_frames[i])