@@ -24,6 +24,9 @@ def next(x):
24
24
25
25
from pandas .util .decorators import Appender
26
26
27
+ class DateConversionError (Exception ):
28
+ pass
29
+
27
30
_parser_params = """Also supports optionally iterating or breaking of the file
28
31
into chunks.
29
32
@@ -51,6 +54,9 @@ def next(x):
51
54
date_parser : function
52
55
Function to use for converting dates to strings. Defaults to
53
56
dateutil.parser
57
+ date_conversion : list or dict, default None
58
+ Can combine multiple columns in date-time specification
59
+ Newly created columns are prepended to the output
54
60
dayfirst : boolean, default False
55
61
DD/MM format dates, international and European format
56
62
thousands : str, default None
@@ -186,6 +192,7 @@ def read_csv(filepath_or_buffer,
186
192
parse_dates = False ,
187
193
dayfirst = False ,
188
194
date_parser = None ,
195
+ date_conversion = None ,
189
196
nrows = None ,
190
197
iterator = False ,
191
198
chunksize = None ,
@@ -216,6 +223,7 @@ def read_table(filepath_or_buffer,
216
223
parse_dates = False ,
217
224
dayfirst = False ,
218
225
date_parser = None ,
226
+ date_conversion = None ,
219
227
nrows = None ,
220
228
iterator = False ,
221
229
chunksize = None ,
@@ -250,6 +258,7 @@ def read_fwf(filepath_or_buffer,
250
258
parse_dates = False ,
251
259
dayfirst = False ,
252
260
date_parser = None ,
261
+ date_conversion = None ,
253
262
nrows = None ,
254
263
iterator = False ,
255
264
chunksize = None ,
@@ -351,6 +360,7 @@ class TextParser(object):
351
360
Comment out remainder of line
352
361
parse_dates : boolean, default False
353
362
date_parser : function, default None
363
+ date_conversion : list or dict, default None
354
364
skiprows : list of integers
355
365
Row numbers to skip
356
366
skip_footer : int
@@ -362,8 +372,8 @@ class TextParser(object):
362
372
def __init__ (self , f , delimiter = None , names = None , header = 0 ,
363
373
index_col = None , na_values = None , thousands = None ,
364
374
comment = None , parse_dates = False ,
365
- date_parser = None , dayfirst = False , chunksize = None ,
366
- skiprows = None , skip_footer = 0 , converters = None ,
375
+ date_parser = None , date_conversion = None , dayfirst = False ,
376
+ chunksize = None , skiprows = None , skip_footer = 0 , converters = None ,
367
377
verbose = False , encoding = None ):
368
378
"""
369
379
Workhorse function for processing nested list into DataFrame
@@ -382,6 +392,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
382
392
383
393
self .parse_dates = parse_dates
384
394
self .date_parser = date_parser
395
+ self .date_conversion = date_conversion
385
396
self .dayfirst = dayfirst
386
397
387
398
if com .is_integer (skiprows ):
@@ -745,9 +756,11 @@ def get_chunk(self, rows=None):
745
756
data [x ] = lib .try_parse_dates (data [x ], parser = self .date_parser ,
746
757
dayfirst = self .dayfirst )
747
758
759
+ data , columns = self ._process_date_conversion (data , self .columns )
760
+
748
761
data = _convert_to_ndarrays (data , self .na_values , self .verbose )
749
762
750
- return DataFrame (data = data , columns = self . columns , index = index )
763
+ return DataFrame (data = data , columns = columns , index = index )
751
764
752
765
def _find_line_number (self , exp_len , chunk_len , chunk_i ):
753
766
if exp_len is None :
@@ -778,6 +791,52 @@ def _should_parse_dates(self, i):
778
791
name = self .index_name [i ]
779
792
return i in to_parse or name in to_parse
780
793
794
+ def _process_date_conversion (self , data_dict , columns ):
795
+ if self .date_conversion is None :
796
+ return data_dict , columns
797
+
798
+ new_cols = []
799
+ new_data = {}
800
+
801
+ def date_converter (* date_cols ):
802
+ if self .date_parser is None :
803
+ return lib .try_parse_dates (_concat_date_cols (date_cols ),
804
+ dayfirst = self .dayfirst )
805
+ else :
806
+ try :
807
+ return self .date_parser (date_cols )
808
+ except :
809
+ return lib .try_parse_dates (_concat_date_cols (date_cols ),
810
+ parser = self .date_parser ,
811
+ dayfirst = self .dayfirst )
812
+
813
+ if isinstance (self .date_conversion , list ):
814
+ # list of column lists
815
+ for colspec in self .date_conversion :
816
+ new_name , col = _try_convert_dates (date_converter , colspec ,
817
+ data_dict , columns )
818
+ if new_name in data_dict :
819
+ raise ValueError ('Result date column already in dict %s' %
820
+ new_name )
821
+ new_data [new_name ] = col
822
+ new_cols .append (new_name )
823
+
824
+ elif isinstance (self .date_conversion , dict ):
825
+ # dict of new name to column list
826
+ for new_name , colspec in self .date_conversion .iteritems ():
827
+ if new_name in data_dict :
828
+ raise ValueError ('Date column %s already in dict' %
829
+ new_name )
830
+
831
+ _ , col = _try_convert_dates (date_converter , colspec , data_dict ,
832
+ columns )
833
+ new_data [new_name ] = col
834
+ new_cols .append (new_name )
835
+
836
+ data_dict .update (new_data )
837
+ new_cols .extend (columns )
838
+ return data_dict , new_cols
839
+
781
840
def _get_lines (self , rows = None ):
782
841
source = self .data
783
842
lines = self .buf
@@ -860,6 +919,31 @@ def _convert_types(values, na_values):
860
919
861
920
return result , na_count
862
921
922
+ def _get_col_names (colspec , columns ):
923
+ colset = set (columns )
924
+ colnames = []
925
+ for c in colspec :
926
+ if c in colset :
927
+ colnames .append (str (c ))
928
+ elif isinstance (c , int ):
929
+ colnames .append (str (columns [c ]))
930
+ return colnames
931
+
932
+ def _try_convert_dates (parser , colspec , data_dict , columns ):
933
+ colspec = _get_col_names (colspec , columns )
934
+ new_name = '_' .join (colspec )
935
+
936
+ to_parse = [data_dict [c ] for c in colspec if c in data_dict ]
937
+ try :
938
+ new_col = parser (* to_parse )
939
+ except DateConversionError :
940
+ new_col = _concat_date_cols (to_parse )
941
+ return new_name , new_col
942
+
943
+ def _concat_date_cols (date_cols ):
944
+ concat = lambda x : ' ' .join (x )
945
+ return np .array (np .apply_along_axis (concat , 0 , np .vstack (date_cols )),
946
+ dtype = object )
863
947
864
948
class FixedWidthReader (object ):
865
949
"""
0 commit comments