@@ -156,6 +156,7 @@ void parser_set_default_options(parser_t *self) {
156
156
self -> thousands = '\0' ;
157
157
158
158
self -> skipset = NULL ;
159
+ self -> skip_first_N_rows = -1 ;
159
160
self -> skip_footer = 0 ;
160
161
}
161
162
@@ -444,21 +445,17 @@ static int end_line(parser_t *self) {
444
445
}
445
446
}
446
447
447
- if (self -> skipset != NULL ) {
448
- k = kh_get_int64 ((kh_int64_t * ) self -> skipset , self -> file_lines );
449
-
450
- if (k != ((kh_int64_t * )self -> skipset )-> n_buckets ) {
451
- TRACE (("Skipping row %d\n" , self -> file_lines ));
452
- // increment file line count
453
- self -> file_lines ++ ;
454
-
455
- // skip the tokens from this bad line
456
- self -> line_start [self -> lines ] += fields ;
448
+ if (self -> state == SKIP_LINE ) {
449
+ TRACE (("Skipping row %d\n" , self -> file_lines ));
450
+ // increment file line count
451
+ self -> file_lines ++ ;
452
+
453
+ // skip the tokens from this bad line
454
+ self -> line_start [self -> lines ] += fields ;
457
455
458
- // reset field count
459
- self -> line_fields [self -> lines ] = 0 ;
460
- return 0 ;
461
- }
456
+ // reset field count
457
+ self -> line_fields [self -> lines ] = 0 ;
458
+ return 0 ;
462
459
}
463
460
464
461
/* printf("Line: %d, Fields: %d, Ex-fields: %d\n", self->lines, fields, ex_fields); */
@@ -556,6 +553,15 @@ int parser_add_skiprow(parser_t *self, int64_t row) {
556
553
return 0 ;
557
554
}
558
555
556
+ int parser_set_skipfirstnrows (parser_t * self , int64_t nrows ) {
557
+ // self->file_lines is zero based so subtract 1 from nrows
558
+ if (nrows > 0 ) {
559
+ self -> skip_first_N_rows = nrows - 1 ;
560
+ }
561
+
562
+ return 0 ;
563
+ }
564
+
559
565
static int parser_buffer_bytes (parser_t * self , size_t nbytes ) {
560
566
int status ;
561
567
size_t bytes_read ;
@@ -656,6 +662,15 @@ typedef int (*parser_op)(parser_t *self, size_t line_limit);
656
662
TRACE(("datapos: %d, datalen: %d\n", self->datapos, self->datalen));
657
663
658
664
665
+ int skip_this_line (parser_t * self , int64_t rownum ) {
666
+ if (self -> skipset != NULL ) {
667
+ return ( kh_get_int64 ((kh_int64_t * ) self -> skipset , self -> file_lines ) !=
668
+ ((kh_int64_t * )self -> skipset )-> n_buckets );
669
+ }
670
+ else {
671
+ return ( rownum <= self -> skip_first_N_rows );
672
+ }
673
+ }
659
674
660
675
int tokenize_delimited (parser_t * self , size_t line_limit )
661
676
{
@@ -688,10 +703,25 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
688
703
689
704
switch (self -> state ) {
690
705
706
+ case SKIP_LINE :
707
+ // TRACE(("tokenize_delimited SKIP_LINE %c, state %d\n", c, self->state));
708
+ if (c == '\n' ) {
709
+ END_LINE ();
710
+ }
711
+ break ;
712
+
691
713
case START_RECORD :
692
714
// start of record
693
-
694
- if (c == '\n' ) {
715
+ if (skip_this_line (self , self -> file_lines )) {
716
+ if (c == '\n' ) {
717
+ END_LINE ()
718
+ }
719
+ else {
720
+ self -> state = SKIP_LINE ;
721
+ }
722
+ break ;
723
+ }
724
+ else if (c == '\n' ) {
695
725
// \n\r possible?
696
726
if (self -> skip_empty_lines )
697
727
{
@@ -1006,9 +1036,26 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
1006
1036
self -> state ));
1007
1037
1008
1038
switch (self -> state ) {
1039
+
1040
+ case SKIP_LINE :
1041
+ // TRACE(("tokenize_delim_customterm SKIP_LINE %c, state %d\n", c, self->state));
1042
+ if (c == self -> lineterminator ) {
1043
+ END_LINE ();
1044
+ }
1045
+ break ;
1046
+
1009
1047
case START_RECORD :
1010
1048
// start of record
1011
- if (c == self -> lineterminator ) {
1049
+ if (skip_this_line (self , self -> file_lines )) {
1050
+ if (c == self -> lineterminator ) {
1051
+ END_LINE ()
1052
+ }
1053
+ else {
1054
+ self -> state = SKIP_LINE ;
1055
+ }
1056
+ break ;
1057
+ }
1058
+ else if (c == self -> lineterminator ) {
1012
1059
// \n\r possible?
1013
1060
if (self -> skip_empty_lines )
1014
1061
{
@@ -1252,6 +1299,14 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
1252
1299
self -> state ));
1253
1300
1254
1301
switch (self -> state ) {
1302
+
1303
+ case SKIP_LINE :
1304
+ // TRACE(("tokenize_whitespace SKIP_LINE %c, state %d\n", c, self->state));
1305
+ if (c == '\n' ) {
1306
+ END_LINE ();
1307
+ }
1308
+ break ;
1309
+
1255
1310
case WHITESPACE_LINE :
1256
1311
if (c == '\n' ) {
1257
1312
self -> file_lines ++ ;
@@ -1283,9 +1338,17 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
1283
1338
1284
1339
case START_RECORD :
1285
1340
// start of record
1286
- if (c == '\n' ) {
1287
- // \n\r possible?
1341
+ if (skip_this_line (self , self -> file_lines )) {
1342
+ if (c == '\n' ) {
1343
+ END_LINE ()
1344
+ }
1345
+ else {
1346
+ self -> state = SKIP_LINE ;
1347
+ }
1348
+ break ;
1349
+ } else if (c == '\n' ) {
1288
1350
if (self -> skip_empty_lines )
1351
+ // \n\r possible?
1289
1352
{
1290
1353
self -> file_lines ++ ;
1291
1354
}
0 commit comments