Skip to content

Commit efb7b0d

Browse files
committed
Merge pull request #4969 from guyrt/issue-4382
BUG: fix skiprows option for python parser in read_csv
2 parents 151ba51 + 079f3f1 commit efb7b0d

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ Bug Fixes
458458
weren't strings (:issue:`4956`)
459459
- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
460460
separate metadata. (:issue:`4202`, :issue:`4830`)
461+
- Fixed skiprows option in Python parser for read_csv (:issue:`4382`)
461462

462463
pandas 0.12.0
463464
-------------

pandas/io/parsers.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,6 @@ def __init__(self, f, **kwds):
12831283

12841284
# needs to be cleaned/refactored
12851285
# multiple date column thing turning into a real spaghetti factory
1286-
12871286
if not self._has_complex_date_col:
12881287
(index_names,
12891288
self.orig_names, _) = self._get_index_name(self.columns)
@@ -1561,8 +1560,6 @@ def _get_index_name(self, columns):
15611560
except StopIteration:
15621561
next_line = None
15631562

1564-
index_name = None
1565-
15661563
# implicitly index_col=0 b/c 1 fewer column names
15671564
implicit_first_cols = 0
15681565
if line is not None:
@@ -1647,11 +1644,20 @@ def _get_lines(self, rows=None):
16471644
if self.pos > len(source):
16481645
raise StopIteration
16491646
if rows is None:
1650-
lines.extend(source[self.pos:])
1651-
self.pos = len(source)
1647+
new_rows = source[self.pos:]
1648+
new_pos = len(source)
16521649
else:
1653-
lines.extend(source[self.pos:self.pos + rows])
1654-
self.pos += rows
1650+
new_rows = source[self.pos:self.pos + rows]
1651+
new_pos = self.pos + rows
1652+
1653+
# Check for stop rows. n.b.: self.skiprows is a set.
1654+
if self.skiprows:
1655+
new_rows = [row for i, row in enumerate(new_rows)
1656+
if i + self.pos not in self.skiprows]
1657+
1658+
lines.extend(new_rows)
1659+
self.pos = new_pos
1660+
16551661
else:
16561662
new_rows = []
16571663
try:
@@ -1673,6 +1679,9 @@ def _get_lines(self, rows=None):
16731679
raise Exception(msg)
16741680
raise
16751681
except StopIteration:
1682+
if self.skiprows:
1683+
new_rows = [row for i, row in enumerate(new_rows)
1684+
if self.pos + i not in self.skiprows]
16761685
lines.extend(new_rows)
16771686
if len(lines) == 0:
16781687
raise

pandas/io/tests/test_parsers.py

+8
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,14 @@ def test_skiprows_bug(self):
735735
tm.assert_frame_equal(data, expected)
736736
tm.assert_frame_equal(data, data2)
737737

738+
def test_deep_skiprows(self):
739+
# GH #4382
740+
text = "a,b,c\n" + "\n".join([",".join([str(i), str(i+1), str(i+2)]) for i in range(10)])
741+
condensed_text = "a,b,c\n" + "\n".join([",".join([str(i), str(i+1), str(i+2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]])
742+
data = self.read_csv(StringIO(text), skiprows=[6, 8])
743+
condensed_data = self.read_csv(StringIO(condensed_text))
744+
tm.assert_frame_equal(data, condensed_data)
745+
738746
def test_detect_string_na(self):
739747
data = """A,B
740748
foo,bar

0 commit comments

Comments
 (0)