@@ -64,9 +64,11 @@ class ParserWarning(Warning):
64
64
pass ``header=0`` to be able to replace existing names. The header can be
65
65
a list of integers that specify row locations for a multi-index on the
66
66
columns E.g. [0,1,3]. Intervening rows that are not specified will be
67
- skipped. (E.g. 2 in this example are skipped)
67
+ skipped (e.g. 2 in this example are skipped). Note that this parameter
68
+ ignores commented lines, so header=0 denotes the first line of
69
+ data rather than the first line of the file.
68
70
skiprows : list-like or integer
69
- Row numbers to skip (0-indexed) or number of rows to skip (int)
71
+ Line numbers to skip (0-indexed) or number of lines to skip (int)
70
72
at the start of the file
71
73
index_col : int or sequence or False, default None
72
74
Column to use as the row labels of the DataFrame. If a sequence is given, a
@@ -106,8 +108,12 @@ class ParserWarning(Warning):
106
108
thousands : str, default None
107
109
Thousands separator
108
110
comment : str, default None
109
- Indicates remainder of line should not be parsed
110
- Does not support line commenting (will return empty line)
111
+ Indicates remainder of line should not be parsed. If found at the
112
+ beginning of a line, the line will be ignored altogether. This parameter
113
+ must be a single character. Also, fully commented lines
114
+ are ignored by the parameter `header` but not by `skiprows`. For example,
115
+ if comment='#', parsing '#empty\n 1,2,3\n a,b,c' with `header=0` will
116
+ result in '1,2,3' being treated as the header.
111
117
decimal : str, default '.'
112
118
Character to recognize as decimal point. E.g. use ',' for European data
113
119
nrows : int, default None
@@ -1313,6 +1319,7 @@ def __init__(self, f, **kwds):
1313
1319
self .data = None
1314
1320
self .buf = []
1315
1321
self .pos = 0
1322
+ self .line_pos = 0
1316
1323
1317
1324
self .encoding = kwds ['encoding' ]
1318
1325
self .compression = kwds ['compression' ]
@@ -1459,6 +1466,7 @@ class MyDialect(csv.Dialect):
1459
1466
line = self ._check_comments ([line ])[0 ]
1460
1467
1461
1468
self .pos += 1
1469
+ self .line_pos += 1
1462
1470
sniffed = csv .Sniffer ().sniff (line )
1463
1471
dia .delimiter = sniffed .delimiter
1464
1472
if self .encoding is not None :
@@ -1566,7 +1574,7 @@ def _infer_columns(self):
1566
1574
if self .header is not None :
1567
1575
header = self .header
1568
1576
1569
- # we have a mi columns, so read and extra line
1577
+ # we have a mi columns, so read an extra line
1570
1578
if isinstance (header , (list , tuple , np .ndarray )):
1571
1579
have_mi_columns = True
1572
1580
header = list (header ) + [header [- 1 ] + 1 ]
@@ -1578,9 +1586,8 @@ def _infer_columns(self):
1578
1586
for level , hr in enumerate (header ):
1579
1587
line = self ._buffered_line ()
1580
1588
1581
- while self .pos <= hr :
1589
+ while self .line_pos <= hr :
1582
1590
line = self ._next_line ()
1583
-
1584
1591
unnamed_count = 0
1585
1592
this_columns = []
1586
1593
for i , c in enumerate (line ):
@@ -1705,25 +1712,36 @@ def _buffered_line(self):
1705
1712
else :
1706
1713
return self ._next_line ()
1707
1714
1715
+ def _empty (self , line ):
1716
+ return not line or all (not x for x in line )
1717
+
1708
1718
def _next_line (self ):
1709
1719
if isinstance (self .data , list ):
1710
1720
while self .pos in self .skiprows :
1711
1721
self .pos += 1
1712
1722
1713
- try :
1714
- line = self .data [self .pos ]
1715
- except IndexError :
1716
- raise StopIteration
1723
+ while True :
1724
+ try :
1725
+ line = self ._check_comments ([self .data [self .pos ]])[0 ]
1726
+ self .pos += 1
1727
+ # either uncommented or blank to begin with
1728
+ if self ._empty (self .data [self .pos - 1 ]) or line :
1729
+ break
1730
+ except IndexError :
1731
+ raise StopIteration
1717
1732
else :
1718
1733
while self .pos in self .skiprows :
1719
1734
next (self .data )
1720
1735
self .pos += 1
1721
1736
1722
- line = next (self .data )
1723
-
1724
- line = self ._check_comments ([line ])[0 ]
1737
+ while True :
1738
+ orig_line = next (self .data )
1739
+ line = self ._check_comments ([orig_line ])[0 ]
1740
+ self .pos += 1
1741
+ if self ._empty (orig_line ) or line :
1742
+ break
1725
1743
1726
- self .pos += 1
1744
+ self .line_pos += 1
1727
1745
self .buf .append (line )
1728
1746
1729
1747
return line
0 commit comments