@@ -55,6 +55,8 @@ def next(x):
55
55
DD/MM format dates, international and European format
56
56
thousands : str, default None
57
57
Thousands separator
58
+ comment : str, default None
59
+ Indicates remainder of line should not be parsed
58
60
nrows : int, default None
59
61
Number of rows of file to read. Useful for reading pieces of large files
60
62
iterator : boolean, default False
@@ -179,6 +181,7 @@ def read_csv(filepath_or_buffer,
179
181
skiprows = None ,
180
182
na_values = None ,
181
183
thousands = None ,
184
+ comment = None ,
182
185
parse_dates = False ,
183
186
dayfirst = False ,
184
187
date_parser = None ,
@@ -208,6 +211,7 @@ def read_table(filepath_or_buffer,
208
211
skiprows = None ,
209
212
na_values = None ,
210
213
thousands = None ,
214
+ comment = None ,
211
215
parse_dates = False ,
212
216
dayfirst = False ,
213
217
date_parser = None ,
@@ -241,6 +245,7 @@ def read_fwf(filepath_or_buffer,
241
245
skiprows = None ,
242
246
na_values = None ,
243
247
thousands = None ,
248
+ comment = None ,
244
249
parse_dates = False ,
245
250
dayfirst = False ,
246
251
date_parser = None ,
@@ -339,6 +344,10 @@ class TextParser(object):
339
344
Column or columns to use as the (possibly hierarchical) index
340
345
na_values : iterable, default None
341
346
Custom NA values
347
+ thousands : str, default None
348
+ Thousands separator
349
+ comment : str, default None
350
+ Comment out remainder of line
342
351
parse_dates : boolean, default False
343
352
date_parser : function, default None
344
353
skiprows : list of integers
@@ -351,7 +360,7 @@ class TextParser(object):
351
360
352
361
def __init__ (self , f , delimiter = None , names = None , header = 0 ,
353
362
index_col = None , na_values = None , thousands = None ,
354
- parse_dates = False ,
363
+ comment = None , parse_dates = False ,
355
364
date_parser = None , dayfirst = False , chunksize = None ,
356
365
skiprows = None , skip_footer = 0 , converters = None ,
357
366
verbose = False , encoding = None ):
@@ -398,6 +407,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
398
407
self .na_values = set (list (na_values )) | _NA_VALUES
399
408
400
409
self .thousands = thousands
410
+ self .comment = comment
401
411
402
412
if hasattr (f , 'readline' ):
403
413
self ._make_reader (f )
@@ -430,6 +440,12 @@ def _make_reader(self, f):
430
440
self .pos += 1
431
441
line = f .readline ()
432
442
443
+ while self ._is_commented (line ):
444
+ self .pos += 1
445
+ line = f .readline ()
446
+
447
+ line = self ._check_comments ([line ])[0 ]
448
+
433
449
self .pos += 1
434
450
sniffed = csv .Sniffer ().sniff (line )
435
451
dia .delimiter = sniffed .delimiter
@@ -498,22 +514,56 @@ def _next_line(self):
498
514
self .pos += 1
499
515
500
516
try :
501
- line = self .data [self .pos ]
517
+ while True :
518
+ line = self .data [self .pos ]
519
+ if not self ._is_commented (line ):
520
+ break
521
+ self .pos += 1
502
522
except IndexError :
503
523
raise StopIteration
504
524
else :
505
525
while self .pos in self .skiprows :
506
526
next (self .data )
507
527
self .pos += 1
508
- line = next (self .data )
509
528
529
+ while True :
530
+ line = next (self .data )
531
+ if not self ._is_commented (line ):
532
+ break
533
+ self .pos += 1
534
+
535
+ line = self ._check_comments ([line ])[0 ]
510
536
line = self ._check_thousands ([line ])[0 ]
511
537
512
538
self .pos += 1
513
539
self .buf .append (line )
514
540
515
541
return line
516
542
543
+ def _is_commented (self , line ):
544
+ if self .comment is None or len (line ) == 0 :
545
+ return False
546
+ return line [0 ].startswith (self .comment )
547
+
548
+ def _check_comments (self , lines ):
549
+ if self .comment is None :
550
+ return lines
551
+ ret = []
552
+ for l in lines :
553
+ rl = []
554
+ for x in l :
555
+ if (not isinstance (x , basestring ) or
556
+ self .comment not in x ):
557
+ rl .append (x )
558
+ else :
559
+ x = x [:x .find (self .comment )]
560
+ if len (x ) > 0 :
561
+ rl .append (x )
562
+ break
563
+ if len (rl ) > 0 :
564
+ ret .append (rl )
565
+ return ret
566
+
517
567
def _check_thousands (self , lines ):
518
568
if self .thousands is None :
519
569
return lines
@@ -730,6 +780,7 @@ def _get_lines(self, rows=None):
730
780
if self .skip_footer :
731
781
lines = lines [:- self .skip_footer ]
732
782
783
+ lines = self ._check_comments (lines )
733
784
return self ._check_thousands (lines )
734
785
735
786
def _convert_to_ndarrays (dct , na_values , verbose = False ):
0 commit comments