pandas-dev · jreback · Sep 24, 2013 · Sep 24, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -457,6 +457,7 @@ Bug Fixes
     weren't strings (:issue:`4956`)
   - Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
     separate metadata. (:issue:`4202`, :issue:`4830`)
+  - Fixed skiprows option in Python parser for read_csv (:issue:`4382`)
 
 pandas 0.12.0
 -------------

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1283,7 +1283,6 @@ def __init__(self, f, **kwds):
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
-
         if not self._has_complex_date_col:
             (index_names,
              self.orig_names, _) = self._get_index_name(self.columns)
@@ -1561,8 +1560,6 @@ def _get_index_name(self, columns):
         except StopIteration:
             next_line = None
 
-        index_name = None
-
         # implicitly index_col=0 b/c 1 fewer column names
         implicit_first_cols = 0
         if line is not None:
@@ -1647,11 +1644,20 @@ def _get_lines(self, rows=None):
                 if self.pos > len(source):
                     raise StopIteration
                 if rows is None:
-                    lines.extend(source[self.pos:])
-                    self.pos = len(source)
+                    new_rows = source[self.pos:]
+                    new_pos = len(source)
                 else:
-                    lines.extend(source[self.pos:self.pos + rows])
-                    self.pos += rows
+                    new_rows = source[self.pos:self.pos + rows]
+                    new_pos = self.pos + rows
+
+                # Check for stop rows. n.b.: self.skiprows is a set.
+                if self.skiprows:
+                    new_rows = [row for i, row in enumerate(new_rows)
+                                if i + self.pos not in self.skiprows]
+
+                lines.extend(new_rows)
+                self.pos = new_pos
+
             else:
                 new_rows = []
                 try:
@@ -1673,6 +1679,9 @@ def _get_lines(self, rows=None):
                                     raise Exception(msg)
                                 raise
                 except StopIteration:
+                    if self.skiprows:
+                        new_rows = [row for i, row in enumerate(new_rows)
+                                    if self.pos + i not in self.skiprows]
                     lines.extend(new_rows)
                     if len(lines) == 0:
                         raise

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -735,6 +735,14 @@ def test_skiprows_bug(self):
         tm.assert_frame_equal(data, expected)
         tm.assert_frame_equal(data, data2)
 
+    def test_deep_skiprows(self):
+        # GH #4382
+        text = "a,b,c\n" + "\n".join([",".join([str(i), str(i+1), str(i+2)]) for i in range(10)])
+        condensed_text = "a,b,c\n" + "\n".join([",".join([str(i), str(i+1), str(i+2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]])
+        data = self.read_csv(StringIO(text), skiprows=[6, 8])
+        condensed_data = self.read_csv(StringIO(condensed_text))
+        tm.assert_frame_equal(data, condensed_data)
+
     def test_detect_string_na(self):
         data = """A,B
 foo,bar