Skip to content

Commit 7218867

Browse files
committed
Merge branch 'trailing_spaces_fix' of https://github.com/selasley/pandas into selasley-trailing_spaces_fix
Conflicts: doc/source/whatsnew/v0.16.0.txt
2 parents 6d635fc + e020829 commit 7218867

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ Bug Fixes
109109

110110

111111
- DataFrame now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`)
112+
- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)

pandas/io/tests/test_parsers.py

+24
Original file line numberDiff line numberDiff line change
@@ -3048,6 +3048,30 @@ def test_comment_skiprows(self):
30483048
df = self.read_csv(StringIO(data), comment='#', skiprows=4)
30493049
tm.assert_almost_equal(df.values, expected)
30503050

3051+
def test_skiprows_lineterminator(self):
3052+
#GH #9079
3053+
data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
3054+
'2007/01/01 01:00 0.2140 U M ',
3055+
'2007/01/01 02:00 0.2141 M O ',
3056+
'2007/01/01 04:00 0.2142 D M '])
3057+
expected = pd.DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
3058+
['2007/01/01', '02:00', 0.2141, 'M', 'O'],
3059+
['2007/01/01', '04:00', 0.2142, 'D', 'M']],
3060+
columns=['date', 'time', 'var', 'flag',
3061+
'oflag'])
3062+
# test with the three default lineterminators LF, CR and CRLF
3063+
df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
3064+
names=['date', 'time', 'var', 'flag', 'oflag'])
3065+
tm.assert_frame_equal(df, expected)
3066+
df = self.read_csv(StringIO(data.replace('\n', '\r')),
3067+
skiprows=1, delim_whitespace=True,
3068+
names=['date', 'time', 'var', 'flag', 'oflag'])
3069+
tm.assert_frame_equal(df, expected)
3070+
df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
3071+
skiprows=1, delim_whitespace=True,
3072+
names=['date', 'time', 'var', 'flag', 'oflag'])
3073+
tm.assert_frame_equal(df, expected)
3074+
30513075
def test_trailing_spaces(self):
30523076
data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n"
30533077
expected = pd.DataFrame([[1., 2., 4.],

pandas/src/parser/tokenizer.c

+6
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,9 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
707707
// TRACE(("tokenize_delimited SKIP_LINE %c, state %d\n", c, self->state));
708708
if (c == '\n') {
709709
END_LINE();
710+
} else if (c == '\r') {
711+
self->file_lines++;
712+
self->state = EAT_CRNL_NOP;
710713
}
711714
break;
712715

@@ -1304,6 +1307,9 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
13041307
// TRACE(("tokenize_whitespace SKIP_LINE %c, state %d\n", c, self->state));
13051308
if (c == '\n') {
13061309
END_LINE();
1310+
} else if (c == '\r') {
1311+
self->file_lines++;
1312+
self->state = EAT_CRNL_NOP;
13071313
}
13081314
break;
13091315

0 commit comments

Comments
 (0)