@@ -217,9 +217,7 @@ void parser_free(parser_t *self) {
217
217
parser_cleanup (self );
218
218
}
219
219
220
- void parser_del (parser_t * self ) {
221
- free (self );
222
- }
220
+ void parser_del (parser_t * self ) { free (self ); }
223
221
224
222
static int make_stream_space (parser_t * self , size_t nbytes ) {
225
223
uint64_t i , cap , length ;
@@ -278,9 +276,8 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
278
276
}
279
277
280
278
self -> words =
281
- (char * * )grow_buffer ((void * )self -> words , length ,
282
- & self -> words_cap , nbytes ,
283
- sizeof (char * ), & status );
279
+ (char * * )grow_buffer ((void * )self -> words , length , & self -> words_cap ,
280
+ nbytes , sizeof (char * ), & status );
284
281
TRACE (
285
282
("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, "
286
283
"%d)\n" ,
@@ -308,10 +305,9 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
308
305
LINE VECTORS
309
306
*/
310
307
cap = self -> lines_cap ;
311
- self -> line_start =
312
- (int64_t * )grow_buffer ((void * )self -> line_start , self -> lines + 1 ,
313
- & self -> lines_cap , nbytes ,
314
- sizeof (int64_t ), & status );
308
+ self -> line_start = (int64_t * )grow_buffer ((void * )self -> line_start ,
309
+ self -> lines + 1 , & self -> lines_cap ,
310
+ nbytes , sizeof (int64_t ), & status );
315
311
TRACE ((
316
312
"make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n" ,
317
313
self -> lines + 1 , self -> lines_cap , nbytes , status ))
@@ -445,7 +441,7 @@ static int end_line(parser_t *self) {
445
441
return 0 ;
446
442
}
447
443
448
- if (!(self -> lines <= self -> header_end + 1 ) &&
444
+ if (!(self -> lines <= self -> header_end + self -> allow_leading_cols ) &&
449
445
(self -> expected_fields < 0 && fields > ex_fields ) && !(self -> usecols )) {
450
446
// increment file line count
451
447
self -> file_lines ++ ;
@@ -460,8 +456,9 @@ static int end_line(parser_t *self) {
460
456
if (self -> error_bad_lines ) {
461
457
self -> error_msg = malloc (bufsize );
462
458
snprintf (self -> error_msg , bufsize ,
463
- "Expected %d fields in line %" PRIu64 ", saw %" PRId64 "\n" ,
464
- ex_fields , self -> file_lines , fields );
459
+ "Expected %d fields in line %" PRIu64 ", saw %" PRId64
460
+ "\n" ,
461
+ ex_fields , self -> file_lines , fields );
465
462
466
463
TRACE (("Error at line %d, %d fields\n" , self -> file_lines , fields ));
467
464
@@ -472,16 +469,16 @@ static int end_line(parser_t *self) {
472
469
// pass up error message
473
470
msg = malloc (bufsize );
474
471
snprintf (msg , bufsize ,
475
- "Skipping line %" PRIu64 ": expected %d fields, saw %"
476
- PRId64 "\n" , self -> file_lines , ex_fields , fields );
472
+ "Skipping line %" PRIu64
473
+ ": expected %d fields, saw %" PRId64 "\n" ,
474
+ self -> file_lines , ex_fields , fields );
477
475
append_warning (self , msg );
478
476
free (msg );
479
477
}
480
478
}
481
479
} else {
482
480
// missing trailing delimiters
483
- if ((self -> lines >= self -> header_end + 1 ) &&
484
- fields < ex_fields ) {
481
+ if ((self -> lines >= self -> header_end + 1 ) && fields < ex_fields ) {
485
482
// might overrun the buffer when closing fields
486
483
if (make_stream_space (self , ex_fields - fields ) < 0 ) {
487
484
int64_t bufsize = 100 ;
@@ -592,20 +589,20 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
592
589
593
590
*/
594
591
595
- #define PUSH_CHAR (c ) \
596
- TRACE( \
597
- ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \
598
- c, slen, self->stream_cap, self->stream_len)) \
599
- if (slen >= self->stream_cap) { \
600
- TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \
601
- self->stream_cap)) \
602
- int64_t bufsize = 100; \
603
- self->error_msg = malloc(bufsize); \
604
- snprintf(self->error_msg, bufsize, \
605
- "Buffer overflow caught - possible malformed input file.\n");\
606
- return PARSER_OUT_OF_MEMORY; \
607
- } \
608
- *stream++ = c; \
592
+ #define PUSH_CHAR (c ) \
593
+ TRACE( \
594
+ ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \
595
+ c, slen, self->stream_cap, self->stream_len)) \
596
+ if (slen >= self->stream_cap) { \
597
+ TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \
598
+ self->stream_cap)) \
599
+ int64_t bufsize = 100; \
600
+ self->error_msg = malloc(bufsize); \
601
+ snprintf(self->error_msg, bufsize, \
602
+ "Buffer overflow caught - possible malformed input file.\n"); \
603
+ return PARSER_OUT_OF_MEMORY; \
604
+ } \
605
+ *stream++ = c; \
609
606
slen++;
610
607
611
608
// This is a little bit of a hack but works for now
@@ -647,8 +644,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
647
644
648
645
#define END_LINE () END_LINE_STATE(START_RECORD)
649
646
650
- #define IS_TERMINATOR (c ) \
651
- (c == line_terminator)
647
+ #define IS_TERMINATOR (c ) (c == line_terminator)
652
648
653
649
#define IS_QUOTE (c ) ((c == self->quotechar && self->quoting != QUOTE_NONE))
654
650
@@ -708,25 +704,24 @@ int skip_this_line(parser_t *self, int64_t rownum) {
708
704
}
709
705
}
710
706
711
- int tokenize_bytes (parser_t * self ,
712
- size_t line_limit , uint64_t start_lines ) {
707
+ int tokenize_bytes (parser_t * self , size_t line_limit , uint64_t start_lines ) {
713
708
int64_t i ;
714
709
uint64_t slen ;
715
710
int should_skip ;
716
711
char c ;
717
712
char * stream ;
718
713
char * buf = self -> data + self -> datapos ;
719
714
720
- const char line_terminator = ( self -> lineterminator == '\0' ) ?
721
- '\n' : self -> lineterminator ;
715
+ const char line_terminator =
716
+ ( self -> lineterminator == '\0' ) ? '\n' : self -> lineterminator ;
722
717
723
718
// 1000 is something that couldn't fit in "char"
724
719
// thus comparing a char to it would always be "false"
725
720
const int carriage_symbol = (self -> lineterminator == '\0' ) ? '\r' : 1000 ;
726
- const int comment_symbol = ( self -> commentchar != '\0' ) ?
727
- self -> commentchar : 1000 ;
728
- const int escape_symbol = ( self -> escapechar != '\0' ) ?
729
- self -> escapechar : 1000 ;
721
+ const int comment_symbol =
722
+ ( self -> commentchar != '\0' ) ? self -> commentchar : 1000 ;
723
+ const int escape_symbol =
724
+ ( self -> escapechar != '\0' ) ? self -> escapechar : 1000 ;
730
725
731
726
if (make_stream_space (self , self -> datalen - self -> datapos ) < 0 ) {
732
727
int64_t bufsize = 100 ;
@@ -833,7 +828,7 @@ int tokenize_bytes(parser_t *self,
833
828
}
834
829
break ;
835
830
}
836
- // fall through
831
+ // fall through
837
832
838
833
case EAT_WHITESPACE :
839
834
if (IS_TERMINATOR (c )) {
@@ -1061,10 +1056,10 @@ int tokenize_bytes(parser_t *self,
1061
1056
} else {
1062
1057
if (self -> delim_whitespace ) {
1063
1058
/* XXX
1064
- * first character of a new record--need to back up and
1065
- * reread
1066
- * to handle properly...
1067
- */
1059
+ * first character of a new record--need to back up and
1060
+ * reread
1061
+ * to handle properly...
1062
+ */
1068
1063
i -- ;
1069
1064
buf -- ; // back up one character (HACK!)
1070
1065
END_LINE_STATE (START_RECORD );
@@ -1144,8 +1139,8 @@ static int parser_handle_eof(parser_t *self) {
1144
1139
case IN_QUOTED_FIELD :
1145
1140
self -> error_msg = (char * )malloc (bufsize );
1146
1141
snprintf (self -> error_msg , bufsize ,
1147
- "EOF inside string starting at row %" PRIu64 ,
1148
- self -> file_lines );
1142
+ "EOF inside string starting at row %" PRIu64 ,
1143
+ self -> file_lines );
1149
1144
return -1 ;
1150
1145
1151
1146
case ESCAPED_CHAR :
@@ -1267,8 +1262,8 @@ int parser_trim_buffers(parser_t *self) {
1267
1262
if (self -> words == NULL ) {
1268
1263
return PARSER_OUT_OF_MEMORY ;
1269
1264
}
1270
- self -> word_starts = realloc ( self -> word_starts ,
1271
- new_cap * sizeof (int64_t ));
1265
+ self -> word_starts =
1266
+ realloc ( self -> word_starts , new_cap * sizeof (int64_t ));
1272
1267
if (self -> word_starts == NULL ) {
1273
1268
return PARSER_OUT_OF_MEMORY ;
1274
1269
}
@@ -1311,15 +1306,13 @@ int parser_trim_buffers(parser_t *self) {
1311
1306
new_cap = _next_pow2 (self -> lines ) + 1 ;
1312
1307
if (new_cap < self -> lines_cap ) {
1313
1308
TRACE (("parser_trim_buffers: new_cap < self->lines_cap\n" ));
1314
- newptr = realloc (self -> line_start ,
1315
- new_cap * sizeof (int64_t ));
1309
+ newptr = realloc (self -> line_start , new_cap * sizeof (int64_t ));
1316
1310
if (newptr == NULL ) {
1317
1311
return PARSER_OUT_OF_MEMORY ;
1318
1312
} else {
1319
1313
self -> line_start = newptr ;
1320
1314
}
1321
- newptr = realloc (self -> line_fields ,
1322
- new_cap * sizeof (int64_t ));
1315
+ newptr = realloc (self -> line_fields , new_cap * sizeof (int64_t ));
1323
1316
if (newptr == NULL ) {
1324
1317
return PARSER_OUT_OF_MEMORY ;
1325
1318
} else {
@@ -1353,8 +1346,8 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all,
1353
1346
if (!all && self -> lines - start_lines >= nrows ) break ;
1354
1347
1355
1348
if (self -> datapos == self -> datalen ) {
1356
- status = parser_buffer_bytes ( self , self -> chunksize ,
1357
- encoding_errors );
1349
+ status =
1350
+ parser_buffer_bytes ( self , self -> chunksize , encoding_errors );
1358
1351
1359
1352
if (status == REACHED_EOF ) {
1360
1353
// close out last line
@@ -1413,11 +1406,11 @@ int tokenize_all_rows(parser_t *self, const char *encoding_errors) {
1413
1406
*/
1414
1407
int to_boolean (const char * item , uint8_t * val ) {
1415
1408
if (strcasecmp (item , "TRUE" ) == 0 ) {
1416
- * val = 1 ;
1417
- return 0 ;
1409
+ * val = 1 ;
1410
+ return 0 ;
1418
1411
} else if (strcasecmp (item , "FALSE" ) == 0 ) {
1419
- * val = 0 ;
1420
- return 0 ;
1412
+ * val = 0 ;
1413
+ return 0 ;
1421
1414
}
1422
1415
1423
1416
return -1 ;
@@ -1611,9 +1604,9 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
1611
1604
return number ;
1612
1605
}
1613
1606
1614
- double precise_xstrtod (const char * str , char * * endptr , char decimal ,
1615
- char sci , char tsep , int skip_trailing ,
1616
- int * error , int * maybe_int ) {
1607
+ double precise_xstrtod (const char * str , char * * endptr , char decimal , char sci ,
1608
+ char tsep , int skip_trailing , int * error ,
1609
+ int * maybe_int ) {
1617
1610
double number ;
1618
1611
int exponent ;
1619
1612
int negative ;
@@ -1751,7 +1744,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
1751
1744
} else if (exponent > 0 ) {
1752
1745
number *= e [exponent ];
1753
1746
} else if (exponent < -308 ) { // Subnormal
1754
- if (exponent < -616 ) { // Prevent invalid array access.
1747
+ if (exponent < -616 ) { // Prevent invalid array access.
1755
1748
number = 0. ;
1756
1749
} else {
1757
1750
number /= e [-308 - exponent ];
@@ -1779,7 +1772,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
1779
1772
with a call to `free`.
1780
1773
*/
1781
1774
1782
- char * _str_copy_decimal_str_c (const char * s , char * * endpos , char decimal ,
1775
+ char * _str_copy_decimal_str_c (const char * s , char * * endpos , char decimal ,
1783
1776
char tsep ) {
1784
1777
const char * p = s ;
1785
1778
size_t length = strlen (s );
@@ -1796,17 +1789,15 @@ char* _str_copy_decimal_str_c(const char *s, char **endpos, char decimal,
1796
1789
}
1797
1790
// Replace `decimal` with '.'
1798
1791
if (* p == decimal ) {
1799
- * dst ++ = '.' ;
1800
- p ++ ;
1792
+ * dst ++ = '.' ;
1793
+ p ++ ;
1801
1794
}
1802
1795
// Copy the remainder of the string as is.
1803
1796
strncpy (dst , p , length + 1 - (p - s ));
1804
- if (endpos != NULL )
1805
- * endpos = (char * )(s + length );
1797
+ if (endpos != NULL ) * endpos = (char * )(s + length );
1806
1798
return s_copy ;
1807
1799
}
1808
1800
1809
-
1810
1801
double round_trip (const char * p , char * * q , char decimal , char sci , char tsep ,
1811
1802
int skip_trailing , int * error , int * maybe_int ) {
1812
1803
// 'normalize' representation to C-locale; replace decimal with '.' and
@@ -1822,20 +1813,22 @@ double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
1822
1813
// PyOS_string_to_double needs to consume the whole string
1823
1814
if (endpc == pc + strlen (pc )) {
1824
1815
if (q != NULL ) {
1825
- // report endptr from source string (p)
1816
+ // report endptr from source string (p)
1826
1817
* q = endptr ;
1827
1818
}
1828
1819
} else {
1829
1820
* error = -1 ;
1830
1821
if (q != NULL ) {
1831
- // p and pc are different len due to tsep removal. Can't report
1832
- // how much it has consumed of p. Just rewind to beginning.
1833
- * q = (char * )p ; // TODO(willayd): this could be undefined behavior
1822
+ // p and pc are different len due to tsep removal. Can't report
1823
+ // how much it has consumed of p. Just rewind to beginning.
1824
+ * q = (char * )p ; // TODO(willayd): this could be undefined behavior
1834
1825
}
1835
1826
}
1836
1827
if (maybe_int != NULL ) * maybe_int = 0 ;
1837
- if (PyErr_Occurred () != NULL ) * error = -1 ;
1838
- else if (r == Py_HUGE_VAL ) * error = (int )Py_HUGE_VAL ;
1828
+ if (PyErr_Occurred () != NULL )
1829
+ * error = -1 ;
1830
+ else if (r == Py_HUGE_VAL )
1831
+ * error = (int )Py_HUGE_VAL ;
1839
1832
PyErr_Clear ();
1840
1833
1841
1834
PyGILState_Release (gstate );
0 commit comments