pandas-dev · jreback · Dec 20, 2014 · Dec 17, 2014
diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
@@ -51,3 +51,4 @@ Bug Fixes
 
 - Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`)
 - Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`)
+- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -3048,6 +3048,30 @@ def test_comment_skiprows(self):
         df = self.read_csv(StringIO(data), comment='#', skiprows=4)
         tm.assert_almost_equal(df.values, expected)
 
+    def test_skiprows_lineterminator(self):
+        #GH #9079
+        data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
+                          '2007/01/01 01:00   0.2140 U M ',
+                          '2007/01/01 02:00   0.2141 M O ',
+                          '2007/01/01 04:00   0.2142 D M '])
+        expected = pd.DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
+                                 ['2007/01/01', '02:00', 0.2141, 'M', 'O'],
+                                 ['2007/01/01', '04:00', 0.2142, 'D', 'M']],
+                                columns=['date', 'time', 'var', 'flag', 
+                                         'oflag'])
+        # test with the three default lineterminators LF, CR and CRLF
+        df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
+                           names=['date', 'time', 'var', 'flag', 'oflag'])
+        tm.assert_frame_equal(df, expected)
+        df = self.read_csv(StringIO(data.replace('\n', '\r')), 
+                           skiprows=1, delim_whitespace=True,
+                           names=['date', 'time', 'var', 'flag', 'oflag'])
+        tm.assert_frame_equal(df, expected)
+        df = self.read_csv(StringIO(data.replace('\n', '\r\n')), 
+                           skiprows=1, delim_whitespace=True,
+                           names=['date', 'time', 'var', 'flag', 'oflag'])
+        tm.assert_frame_equal(df, expected)
+
     def test_trailing_spaces(self):
         data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"
         expected = pd.DataFrame([[1., 2., 4.],

diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
@@ -707,6 +707,9 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
 //            TRACE(("tokenize_delimited SKIP_LINE %c, state %d\n", c, self->state));
             if (c == '\n') {
                 END_LINE();
+            } else if (c == '\r') {
+                self->file_lines++;
+                self->state = EAT_CRNL_NOP;
             }
             break;
 
@@ -1304,6 +1307,9 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
 //            TRACE(("tokenize_whitespace SKIP_LINE %c, state %d\n", c, self->state));
             if (c == '\n') {
                 END_LINE();
+            } else if (c == '\r') {
+                self->file_lines++;
+                self->state = EAT_CRNL_NOP;
             }
             break;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,3 +51,4 @@ Bug Fixes

		- Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`)
		- Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`)
		- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)