Skip to content

Commit 38c0cce

Browse files
committed
BUG: Fixed incorrect stream size check (#14125)
1 parent a62fdf8 commit 38c0cce

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

pandas/io/tests/parser/c_parser_only.py

+12
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,15 @@ def test_float_precision_round_trip_with_text(self):
395395
float_precision='round_trip',
396396
header=None)
397397
tm.assert_frame_equal(df, DataFrame({0: ['a']}))
398+
399+
def test_large_difference_in_columns(self):
400+
# gh-14125
401+
count = 10000
402+
large_row = ('X,' * count)[:-1] + '\n'
403+
normal_row = 'XXXXXX XXXXXX,111111111111111\n'
404+
test_input = (large_row + normal_row * 6)[:-1]
405+
result = self.read_csv(StringIO(test_input), header=None, usecols=[0])
406+
rows = test_input.split('\n')
407+
expected = DataFrame([row.split(',')[0] for row in rows])
408+
409+
tm.assert_frame_equal(result, expected)

pandas/src/parser/tokenizer.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -592,9 +592,9 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
592592
TRACE( \
593593
("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \
594594
c, slen, self->stream_cap, self->stream_len)) \
595-
if (slen >= maxstreamsize) { \
596-
TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= maxstreamsize(%d)\n", slen, \
597-
maxstreamsize)) \
595+
if (slen >= self->stream_cap) { \
596+
TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \
597+
self->stream_cap)) \
598598
int bufsize = 100; \
599599
self->error_msg = (char *)malloc(bufsize); \
600600
snprintf(self->error_msg, bufsize, \
@@ -711,7 +711,6 @@ int skip_this_line(parser_t *self, int64_t rownum) {
711711
int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) {
712712
int i, slen;
713713
int should_skip;
714-
long maxstreamsize;
715714
char c;
716715
char *stream;
717716
char *buf = self->data + self->datapos;
@@ -723,7 +722,6 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) {
723722

724723
stream = self->stream + self->stream_len;
725724
slen = self->stream_len;
726-
maxstreamsize = self->stream_cap;
727725

728726
TRACE(("%s\n", buf));
729727

0 commit comments

Comments
 (0)