327
327
values. The options are `None` for the ordinary converter,
328
328
`high` for the high-precision converter, and `round_trip` for the
329
329
round-trip converter.
330
+ cache_dates : boolean, default False
331
+ If True, use a cache of unique, converted dates to apply the datetime
332
+ conversion. May produce significant speed-up when parsing duplicate
333
+ date strings, especially ones with timezone offsets.
334
+
335
+ .. versionadded:: 0.23.0
330
336
331
337
Returns
332
338
-------
@@ -476,6 +482,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
476
482
'false_values' : None ,
477
483
'converters' : None ,
478
484
'dtype' : None ,
485
+ 'cache_dates' : False ,
479
486
480
487
'thousands' : None ,
481
488
'comment' : None ,
@@ -577,6 +584,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
577
584
keep_date_col = False ,
578
585
date_parser = None ,
579
586
dayfirst = False ,
587
+ cache_dates = False ,
580
588
581
589
# Iteration
582
590
iterator = False ,
@@ -683,6 +691,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
683
691
keep_date_col = keep_date_col ,
684
692
dayfirst = dayfirst ,
685
693
date_parser = date_parser ,
694
+ cache_dates = cache_dates ,
686
695
687
696
nrows = nrows ,
688
697
iterator = iterator ,
@@ -1385,11 +1394,13 @@ def __init__(self, kwds):
1385
1394
self .tupleize_cols = kwds .get ('tupleize_cols' , False )
1386
1395
self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
1387
1396
self .infer_datetime_format = kwds .pop ('infer_datetime_format' , False )
1397
+ self .cache_dates = kwds .pop ('cache_dates' , False )
1388
1398
1389
1399
self ._date_conv = _make_date_converter (
1390
1400
date_parser = self .date_parser ,
1391
1401
dayfirst = self .dayfirst ,
1392
- infer_datetime_format = self .infer_datetime_format
1402
+ infer_datetime_format = self .infer_datetime_format ,
1403
+ cache_dates = self .cache_dates
1393
1404
)
1394
1405
1395
1406
# validate header options for mi
@@ -3179,7 +3190,7 @@ def _get_lines(self, rows=None):
3179
3190
3180
3191
3181
3192
def _make_date_converter (date_parser = None , dayfirst = False ,
3182
- infer_datetime_format = False ):
3193
+ infer_datetime_format = False , cache_dates = False ):
3183
3194
def converter (* date_cols ):
3184
3195
if date_parser is None :
3185
3196
strs = _concat_date_cols (date_cols )
@@ -3190,16 +3201,22 @@ def converter(*date_cols):
3190
3201
utc = None ,
3191
3202
dayfirst = dayfirst ,
3192
3203
errors = 'ignore' ,
3193
- infer_datetime_format = infer_datetime_format
3204
+ infer_datetime_format = infer_datetime_format ,
3205
+ cache = cache_dates
3194
3206
).to_numpy ()
3195
3207
3196
3208
except ValueError :
3197
3209
return tools .to_datetime (
3198
- parsing .try_parse_dates (strs , dayfirst = dayfirst ))
3210
+ parsing .try_parse_dates (strs , dayfirst = dayfirst ),
3211
+ cache = cache_dates
3212
+ )
3199
3213
else :
3200
3214
try :
3201
3215
result = tools .to_datetime (
3202
- date_parser (* date_cols ), errors = 'ignore' )
3216
+ date_parser (* date_cols ),
3217
+ errors = 'ignore' ,
3218
+ cache = cache_dates
3219
+ )
3203
3220
if isinstance (result , datetime .datetime ):
3204
3221
raise Exception ('scalar parser' )
3205
3222
return result
@@ -3209,6 +3226,7 @@ def converter(*date_cols):
3209
3226
parsing .try_parse_dates (_concat_date_cols (date_cols ),
3210
3227
parser = date_parser ,
3211
3228
dayfirst = dayfirst ),
3229
+ cache = cache_dates ,
3212
3230
errors = 'ignore' )
3213
3231
except Exception :
3214
3232
return generic_parser (date_parser , * date_cols )
0 commit comments