Skip to content

Commit 2840bea

Browse files
committed
Merge pull request pandas-dev#10023 from jblackburne/read_csv-newline-chunk
read_csv newline fix
2 parents e686387 + e693c3a commit 2840bea

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ Bug Fixes
298298
- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
299299
- Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
300300
- Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
301+
- Bug in C csv parser causing spurious NaNs when data started with newline followed by whitespace. (:issue:`10022`)
301302

302303
- Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
303304
- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)

pandas/io/tests/test_parsers.py

+6
Original file line numberDiff line numberDiff line change
@@ -2287,6 +2287,12 @@ def test_single_char_leading_whitespace(self):
22872287
result = self.read_csv(StringIO(data), skipinitialspace=True)
22882288
tm.assert_frame_equal(result, expected)
22892289

2290+
def test_chunk_begins_with_newline_whitespace(self):
2291+
# GH 10022
2292+
data = '\n hello\nworld\n'
2293+
result = self.read_csv(StringIO(data), header=None)
2294+
self.assertEqual(len(result), 2)
2295+
22902296

22912297
class TestPythonParser(ParserTests, tm.TestCase):
22922298
def test_negative_skipfooter_raises(self):

pandas/src/parser/tokenizer.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
854854
--i;
855855
} while (i + 1 > self->datapos && *buf != '\n');
856856

857-
if (i + 1 > self->datapos) // reached a newline rather than the beginning
857+
if (*buf == '\n') // reached a newline rather than the beginning
858858
{
859859
++buf; // move pointer to first char after newline
860860
++i;
@@ -1172,7 +1172,7 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
11721172
--i;
11731173
} while (i + 1 > self->datapos && *buf != self->lineterminator);
11741174

1175-
if (i + 1 > self->datapos) // reached a newline rather than the beginning
1175+
if (*buf == self->lineterminator) // reached a newline rather than the beginning
11761176
{
11771177
++buf; // move pointer to first char after newline
11781178
++i;

0 commit comments

Comments
 (0)