Skip to content

Commit d6723fd

Browse files
authored
BUG: read_fwf not handling skiprows correctly with iterator (#44621)
1 parent a164326 commit d6723fd

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ I/O
653653
- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
654654
- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
655655
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
656-
- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`)
656+
- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
657657
- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
658658
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
659659
- Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`)

pandas/io/parsers/python_parser.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,7 @@ def _get_lines(self, rows=None):
10481048
assert self.data is not None
10491049
new_rows.append(next(self.data))
10501050

1051+
len_new_rows = len(new_rows)
10511052
new_rows = self._remove_skipped_rows(new_rows)
10521053
lines.extend(new_rows)
10531054
else:
@@ -1059,13 +1060,15 @@ def _get_lines(self, rows=None):
10591060

10601061
if new_row is not None:
10611062
new_rows.append(new_row)
1063+
len_new_rows = len(new_rows)
10621064

10631065
except StopIteration:
1066+
len_new_rows = len(new_rows)
10641067
new_rows = self._remove_skipped_rows(new_rows)
10651068
lines.extend(new_rows)
10661069
if len(lines) == 0:
10671070
raise
1068-
self.pos += len(new_rows)
1071+
self.pos += len_new_rows
10691072

10701073
self.buf = []
10711074
else:

pandas/tests/io/parser/test_read_fwf.py

+30
Original file line numberDiff line numberDiff line change
@@ -877,3 +877,33 @@ def test_skip_rows_and_n_rows():
877877
result = read_fwf(StringIO(data), nrows=4, skiprows=[2, 4])
878878
expected = DataFrame({"a": [1, 3, 5, 6], "b": ["a", "c", "e", "f"]})
879879
tm.assert_frame_equal(result, expected)
880+
881+
882+
def test_skiprows_with_iterator():
883+
# GH#10261
884+
data = """0
885+
1
886+
2
887+
3
888+
4
889+
5
890+
6
891+
7
892+
8
893+
9
894+
"""
895+
df_iter = read_fwf(
896+
StringIO(data),
897+
colspecs=[(0, 2)],
898+
names=["a"],
899+
iterator=True,
900+
chunksize=2,
901+
skiprows=[0, 1, 2, 6, 9],
902+
)
903+
expected_frames = [
904+
DataFrame({"a": [3, 4]}),
905+
DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]),
906+
DataFrame({"a": []}, index=[], dtype="object"),
907+
]
908+
for i, result in enumerate(df_iter):
909+
tm.assert_frame_equal(result, expected_frames[i])

0 commit comments

Comments
 (0)