39
39
na_values : list-like or dict, default None
40
40
Additional strings to recognize as NA/NaN. If dict passed, specific
41
41
per-column NA values
42
- parse_dates : boolean, default False
43
- Attempt to parse dates in the index column(s)
42
+ parse_dates : boolean or list of column numbers/name , default False
43
+ Attempt to parse dates in the indicated columns
44
44
date_parser : function
45
45
Function to use for converting dates to strings. Defaults to
46
46
dateutil.parser
47
+ dayfirst : boolean, default False
48
+ DD/MM format dates, international and European format
47
49
nrows : int, default None
48
50
Number of rows of file to read. Useful for reading pieces of large files
49
51
iterator : boolean, default False
@@ -168,6 +170,7 @@ def read_csv(filepath_or_buffer,
168
170
skiprows = None ,
169
171
na_values = None ,
170
172
parse_dates = False ,
173
+ dayfirst = False ,
171
174
date_parser = None ,
172
175
nrows = None ,
173
176
iterator = False ,
@@ -195,6 +198,7 @@ def read_table(filepath_or_buffer,
195
198
skiprows = None ,
196
199
na_values = None ,
197
200
parse_dates = False ,
201
+ dayfirst = False ,
198
202
date_parser = None ,
199
203
nrows = None ,
200
204
iterator = False ,
@@ -226,6 +230,7 @@ def read_fwf(filepath_or_buffer,
226
230
skiprows = None ,
227
231
na_values = None ,
228
232
parse_dates = False ,
233
+ dayfirst = False ,
229
234
date_parser = None ,
230
235
nrows = None ,
231
236
iterator = False ,
@@ -242,7 +247,8 @@ def read_fwf(filepath_or_buffer,
242
247
colspecs = kwds .get ('colspecs' , None )
243
248
widths = kwds .pop ('widths' , None )
244
249
if bool (colspecs is None ) == bool (widths is None ):
245
- raise ValueError ("You must specify only one of 'widths' and 'colspecs'" )
250
+ raise ValueError ("You must specify only one of 'widths' and "
251
+ "'colspecs'" )
246
252
247
253
# Compute 'colspec' from 'widths', if specified.
248
254
if widths is not None :
@@ -258,8 +264,8 @@ def read_fwf(filepath_or_buffer,
258
264
259
265
def read_clipboard (** kwargs ): # pragma: no cover
260
266
"""
261
- Read text from clipboard and pass to read_table. See read_table for the full
262
- argument list
267
+ Read text from clipboard and pass to read_table. See read_table for the
268
+ full argument list
263
269
264
270
Returns
265
271
-------
@@ -334,9 +340,9 @@ class TextParser(object):
334
340
335
341
def __init__ (self , f , delimiter = None , names = None , header = 0 ,
336
342
index_col = None , na_values = None , parse_dates = False ,
337
- date_parser = None , chunksize = None , skiprows = None ,
338
- skip_footer = 0 , converters = None , verbose = False ,
339
- encoding = None ):
343
+ date_parser = None , dayfirst = False , chunksize = None ,
344
+ skiprows = None , skip_footer = 0 , converters = None ,
345
+ verbose = False , encoding = None ):
340
346
"""
341
347
Workhorse function for processing nested list into DataFrame
342
348
@@ -348,12 +354,14 @@ def __init__(self, f, delimiter=None, names=None, header=0,
348
354
self .names = list (names ) if names is not None else names
349
355
self .header = header
350
356
self .index_col = index_col
351
- self .parse_dates = parse_dates
352
- self .date_parser = date_parser
353
357
self .chunksize = chunksize
354
358
self .passed_names = names is not None
355
359
self .encoding = encoding
356
360
361
+ self .parse_dates = parse_dates
362
+ self .date_parser = date_parser
363
+ self .dayfirst = dayfirst
364
+
357
365
if com .is_integer (skiprows ):
358
366
skiprows = range (skiprows )
359
367
self .skiprows = set () if skiprows is None else set (skiprows )
@@ -382,6 +390,10 @@ def __init__(self, f, delimiter=None, names=None, header=0,
382
390
else :
383
391
self .data = f
384
392
self .columns = self ._infer_columns ()
393
+
394
+ # get popped off for index
395
+ self .orig_columns = list (self .columns )
396
+
385
397
self .index_name = self ._get_index_name ()
386
398
self ._first_chunk = True
387
399
@@ -588,17 +600,19 @@ def get_chunk(self, rows=None):
588
600
zipped_content .pop (i )
589
601
590
602
if np .isscalar (self .index_col ):
591
- if self .parse_dates :
592
- index = lib .try_parse_dates (index , parser = self .date_parser )
603
+ if self ._should_parse_dates (0 ):
604
+ index = lib .try_parse_dates (index , parser = self .date_parser ,
605
+ dayfirst = self .dayfirst )
593
606
index , na_count = _convert_types (index , self .na_values )
594
607
index = Index (index , name = self .index_name )
595
608
if self .verbose and na_count :
596
609
print 'Found %d NA values in the index' % na_count
597
610
else :
598
611
arrays = []
599
- for arr in index :
600
- if self .parse_dates :
601
- arr = lib .try_parse_dates (arr , parser = self .date_parser )
612
+ for i , arr in enumerate (index ):
613
+ if self ._should_parse_dates (i ):
614
+ arr = lib .try_parse_dates (arr , parser = self .date_parser ,
615
+ dayfirst = self .dayfirst )
602
616
arr , _ = _convert_types (arr , self .na_values )
603
617
arrays .append (arr )
604
618
index = MultiIndex .from_arrays (arrays , names = self .index_name )
@@ -623,10 +637,30 @@ def get_chunk(self, rows=None):
623
637
col = self .columns [col ]
624
638
data [col ] = lib .map_infer (data [col ], f )
625
639
640
+ if not isinstance (self .parse_dates , bool ):
641
+ for x in self .parse_dates :
642
+ if isinstance (x , int ) and x not in data :
643
+ x = self .orig_columns [x ]
644
+ if x in self .index_col or x in self .index_name :
645
+ continue
646
+ data [x ] = lib .try_parse_dates (data [x ], parser = self .date_parser ,
647
+ dayfirst = self .dayfirst )
648
+
626
649
data = _convert_to_ndarrays (data , self .na_values , self .verbose )
627
650
628
651
return DataFrame (data = data , columns = self .columns , index = index )
629
652
653
+ def _should_parse_dates (self , i ):
654
+ if isinstance (self .parse_dates , bool ):
655
+ return self .parse_dates
656
+ else :
657
+ to_parse = self .parse_dates
658
+ if np .isscalar (self .index_col ):
659
+ name = self .index_name
660
+ else :
661
+ name = self .index_name [i ]
662
+ return i in to_parse or name in to_parse
663
+
630
664
def _get_lines (self , rows = None ):
631
665
source = self .data
632
666
lines = self .buf
@@ -725,7 +759,8 @@ def __init__(self, f, colspecs, filler):
725
759
def next (self ):
726
760
line = self .f .next ()
727
761
# Note: 'colspecs' is a sequence of half-open intervals.
728
- return [line [fromm :to ].strip (self .filler or ' ' ) for (fromm , to ) in self .colspecs ]
762
+ return [line [fromm :to ].strip (self .filler or ' ' )
763
+ for (fromm , to ) in self .colspecs ]
729
764
730
765
731
766
class FixedWidthFieldParser (TextParser ):
@@ -743,7 +778,7 @@ def _make_reader(self, f):
743
778
self .data = FixedWidthReader (f , self .colspecs , self .delimiter )
744
779
745
780
746
- #-------------------------------------------------------------------------------
781
+ #----------------------------------------------------------------------
747
782
# ExcelFile class
748
783
749
784
_openpyxl_msg = ("\n For parsing .xlsx files 'openpyxl' is required.\n "
@@ -795,8 +830,8 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
795
830
skiprows : list-like
796
831
Row numbers to skip (0-indexed)
797
832
index_col : int, default None
798
- Column to use as the row labels of the DataFrame. Pass None if there
799
- is no such column
833
+ Column to use as the row labels of the DataFrame. Pass None if
834
+ there is no such column
800
835
na_values : list-like, default None
801
836
List of additional strings to recognize as NA/NaN
802
837
0 commit comments