@@ -501,7 +501,6 @@ def _read(filepath_or_buffer, kwds):
501
501
502
502
_fwf_defaults = {
503
503
'colspecs' : 'infer' ,
504
- 'infer_nrows' : 100 ,
505
504
'widths' : None ,
506
505
}
507
506
@@ -719,8 +718,8 @@ def parser_f(filepath_or_buffer,
719
718
)(read_table )
720
719
721
720
722
- def read_fwf (filepath_or_buffer , colspecs = 'infer' , widths = None ,
723
- infer_nrows = 100 , ** kwds ):
721
+ def read_fwf (filepath_or_buffer , colspecs = 'infer' ,
722
+ widths = None , ** kwds ):
724
723
725
724
r"""
726
725
Read a table of fixed-width formatted lines into DataFrame.
@@ -753,11 +752,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
753
752
widths : list of int, optional
754
753
A list of field widths which can be used instead of 'colspecs' if
755
754
the intervals are contiguous.
756
- infer_nrows : int, default 100
757
- The number of rows to consider when letting the parser determine the
758
- `colspecs`.
759
-
760
- .. versionadded:: 0.24.0
761
755
**kwds : optional
762
756
Optional keyword arguments can be passed to ``TextFileReader``.
763
757
@@ -792,7 +786,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
792
786
col += w
793
787
794
788
kwds ['colspecs' ] = colspecs
795
- kwds ['infer_nrows' ] = infer_nrows
796
789
kwds ['engine' ] = 'python-fwf'
797
790
return _read (filepath_or_buffer , kwds )
798
791
@@ -1752,8 +1745,8 @@ def _cast_types(self, values, cast_type, column):
1752
1745
1753
1746
cats = Index (values ).unique ().dropna ()
1754
1747
values = Categorical ._from_inferred_categories (
1755
- cats , cats .get_indexer (values ), cast_type ,
1756
- true_values = self . true_values )
1748
+ cats , cats .get_indexer (values ), cast_type
1749
+ )
1757
1750
1758
1751
else :
1759
1752
try :
@@ -3449,15 +3442,13 @@ class FixedWidthReader(BaseIterator):
3449
3442
A reader of fixed-width lines.
3450
3443
"""
3451
3444
3452
- def __init__ (self , f , colspecs , delimiter , comment , skiprows = None ,
3453
- infer_nrows = 100 ):
3445
+ def __init__ (self , f , colspecs , delimiter , comment , skiprows = None ):
3454
3446
self .f = f
3455
3447
self .buffer = None
3456
3448
self .delimiter = '\r \n ' + delimiter if delimiter else '\n \r \t '
3457
3449
self .comment = comment
3458
3450
if colspecs == 'infer' :
3459
- self .colspecs = self .detect_colspecs (infer_nrows = infer_nrows ,
3460
- skiprows = skiprows )
3451
+ self .colspecs = self .detect_colspecs (skiprows = skiprows )
3461
3452
else :
3462
3453
self .colspecs = colspecs
3463
3454
@@ -3473,20 +3464,19 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None,
3473
3464
raise TypeError ('Each column specification must be '
3474
3465
'2 element tuple or list of integers' )
3475
3466
3476
- def get_rows (self , infer_nrows , skiprows = None ):
3467
+ def get_rows (self , n , skiprows = None ):
3477
3468
"""
3478
3469
Read rows from self.f, skipping as specified.
3479
3470
3480
- We distinguish buffer_rows (the first <= infer_nrows
3481
- lines) from the rows returned to detect_colspecs
3482
- because it's simpler to leave the other locations
3483
- with skiprows logic alone than to modify them to
3484
- deal with the fact we skipped some rows here as
3485
- well.
3471
+ We distinguish buffer_rows (the first <= n lines)
3472
+ from the rows returned to detect_colspecs because
3473
+ it's simpler to leave the other locations with
3474
+ skiprows logic alone than to modify them to deal
3475
+ with the fact we skipped some rows here as well.
3486
3476
3487
3477
Parameters
3488
3478
----------
3489
- infer_nrows : int
3479
+ n : int
3490
3480
Number of rows to read from self.f, not counting
3491
3481
rows that are skipped.
3492
3482
skiprows: set, optional
@@ -3506,16 +3496,16 @@ def get_rows(self, infer_nrows, skiprows=None):
3506
3496
if i not in skiprows :
3507
3497
detect_rows .append (row )
3508
3498
buffer_rows .append (row )
3509
- if len (detect_rows ) >= infer_nrows :
3499
+ if len (detect_rows ) >= n :
3510
3500
break
3511
3501
self .buffer = iter (buffer_rows )
3512
3502
return detect_rows
3513
3503
3514
- def detect_colspecs (self , infer_nrows = 100 , skiprows = None ):
3504
+ def detect_colspecs (self , n = 100 , skiprows = None ):
3515
3505
# Regex escape the delimiters
3516
3506
delimiters = '' .join (r'\%s' % x for x in self .delimiter )
3517
3507
pattern = re .compile ('([^%s]+)' % delimiters )
3518
- rows = self .get_rows (infer_nrows , skiprows )
3508
+ rows = self .get_rows (n , skiprows )
3519
3509
if not rows :
3520
3510
raise EmptyDataError ("No rows from which to infer column width" )
3521
3511
max_len = max (map (len , rows ))
@@ -3554,10 +3544,8 @@ class FixedWidthFieldParser(PythonParser):
3554
3544
def __init__ (self , f , ** kwds ):
3555
3545
# Support iterators, convert to a list.
3556
3546
self .colspecs = kwds .pop ('colspecs' )
3557
- self .infer_nrows = kwds .pop ('infer_nrows' )
3558
3547
PythonParser .__init__ (self , f , ** kwds )
3559
3548
3560
3549
def _make_reader (self , f ):
3561
3550
self .data = FixedWidthReader (f , self .colspecs , self .delimiter ,
3562
- self .comment , self .skiprows ,
3563
- self .infer_nrows )
3551
+ self .comment , self .skiprows )
0 commit comments