Skip to content

Commit 9e4e447

Browse files
committed
Merge pull request pandas-dev#9834 from evanpw/skip_rows_blank
BUG: skiprows doesn't handle blank lines properly when engine='c'
2 parents a4ae0cf + e67893f commit 9e4e447

File tree

3 files changed

+29
-12
lines changed

3 files changed

+29
-12
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,4 @@ Bug Fixes
9595
- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)
9696

9797
- Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
98+
- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)

pandas/io/tests/test_parsers.py

+22
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,28 @@ def test_deep_skiprows(self):
839839
condensed_data = self.read_csv(StringIO(condensed_text))
840840
tm.assert_frame_equal(data, condensed_data)
841841

842+
def test_skiprows_blank(self):
843+
# GH 9832
844+
text = """#foo,a,b,c
845+
#foo,a,b,c
846+
847+
#foo,a,b,c
848+
#foo,a,b,c
849+
850+
1/1/2000,1.,2.,3.
851+
1/2/2000,4,5,6
852+
1/3/2000,7,8,9
853+
"""
854+
data = self.read_csv(StringIO(text), skiprows=6, header=None,
855+
index_col=0, parse_dates=True)
856+
857+
expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
858+
columns=[1, 2, 3],
859+
index=[datetime(2000, 1, 1), datetime(2000, 1, 2),
860+
datetime(2000, 1, 3)])
861+
expected.index.name = 0
862+
tm.assert_frame_equal(data, expected)
863+
842864
def test_detect_string_na(self):
843865
data = """A,B
844866
foo,bar

pandas/src/parser/tokenizer.c

+6-12
Original file line numberDiff line numberDiff line change
@@ -757,11 +757,9 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
757757
case START_RECORD:
758758
// start of record
759759
if (skip_this_line(self, self->file_lines)) {
760+
self->state = SKIP_LINE;
760761
if (c == '\n') {
761-
END_LINE()
762-
}
763-
else {
764-
self->state = SKIP_LINE;
762+
END_LINE();
765763
}
766764
break;
767765
}
@@ -1093,11 +1091,9 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
10931091
case START_RECORD:
10941092
// start of record
10951093
if (skip_this_line(self, self->file_lines)) {
1094+
self->state = SKIP_LINE;
10961095
if (c == self->lineterminator) {
1097-
END_LINE()
1098-
}
1099-
else {
1100-
self->state = SKIP_LINE;
1096+
END_LINE();
11011097
}
11021098
break;
11031099
}
@@ -1391,11 +1387,9 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
13911387
case START_RECORD:
13921388
// start of record
13931389
if (skip_this_line(self, self->file_lines)) {
1390+
self->state = SKIP_LINE;
13941391
if (c == '\n') {
1395-
END_LINE()
1396-
}
1397-
else {
1398-
self->state = SKIP_LINE;
1392+
END_LINE();
13991393
}
14001394
break;
14011395
} else if (c == '\n') {

0 commit comments

Comments
 (0)