327
327
values. The options are `None` for the ordinary converter,
328
328
`high` for the high-precision converter, and `round_trip` for the
329
329
round-trip converter.
330
+ cache_dates : boolean, default False
331
+ If True, use a cache of unique, converted dates to apply the datetime
332
+ conversion. May produce significant speed-up when parsing duplicate
333
+ date strings, especially ones with timezone offsets.
334
+
335
+ .. versionadded:: 0.23.0
330
336
331
337
Returns
332
338
-------
@@ -476,6 +482,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
476
482
'false_values' : None ,
477
483
'converters' : None ,
478
484
'dtype' : None ,
485
+ 'cache_dates' : False ,
479
486
480
487
'thousands' : None ,
481
488
'comment' : None ,
@@ -577,6 +584,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
577
584
keep_date_col = False ,
578
585
date_parser = None ,
579
586
dayfirst = False ,
587
+ cache_dates = False ,
580
588
581
589
# Iteration
582
590
iterator = False ,
@@ -683,6 +691,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
683
691
keep_date_col = keep_date_col ,
684
692
dayfirst = dayfirst ,
685
693
date_parser = date_parser ,
694
+ cache_dates = cache_dates ,
686
695
687
696
nrows = nrows ,
688
697
iterator = iterator ,
@@ -1379,11 +1388,13 @@ def __init__(self, kwds):
1379
1388
self .tupleize_cols = kwds .get ('tupleize_cols' , False )
1380
1389
self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
1381
1390
self .infer_datetime_format = kwds .pop ('infer_datetime_format' , False )
1391
+ self .cache_dates = kwds .pop ('cache_dates' , False )
1382
1392
1383
1393
self ._date_conv = _make_date_converter (
1384
1394
date_parser = self .date_parser ,
1385
1395
dayfirst = self .dayfirst ,
1386
- infer_datetime_format = self .infer_datetime_format
1396
+ infer_datetime_format = self .infer_datetime_format ,
1397
+ cache_dates = self .cache_dates
1387
1398
)
1388
1399
1389
1400
# validate header options for mi
@@ -3173,7 +3184,7 @@ def _get_lines(self, rows=None):
3173
3184
3174
3185
3175
3186
def _make_date_converter (date_parser = None , dayfirst = False ,
3176
- infer_datetime_format = False ):
3187
+ infer_datetime_format = False , cache_dates = False ):
3177
3188
def converter (* date_cols ):
3178
3189
if date_parser is None :
3179
3190
strs = _concat_date_cols (date_cols )
@@ -3184,16 +3195,22 @@ def converter(*date_cols):
3184
3195
utc = None ,
3185
3196
dayfirst = dayfirst ,
3186
3197
errors = 'ignore' ,
3187
- infer_datetime_format = infer_datetime_format
3198
+ infer_datetime_format = infer_datetime_format ,
3199
+ cache = cache_dates
3188
3200
).to_numpy ()
3189
3201
3190
3202
except ValueError :
3191
3203
return tools .to_datetime (
3192
- parsing .try_parse_dates (strs , dayfirst = dayfirst ))
3204
+ parsing .try_parse_dates (strs , dayfirst = dayfirst ),
3205
+ cache = cache_dates
3206
+ )
3193
3207
else :
3194
3208
try :
3195
3209
result = tools .to_datetime (
3196
- date_parser (* date_cols ), errors = 'ignore' )
3210
+ date_parser (* date_cols ),
3211
+ errors = 'ignore' ,
3212
+ cache = cache_dates
3213
+ )
3197
3214
if isinstance (result , datetime .datetime ):
3198
3215
raise Exception ('scalar parser' )
3199
3216
return result
@@ -3203,6 +3220,7 @@ def converter(*date_cols):
3203
3220
parsing .try_parse_dates (_concat_date_cols (date_cols ),
3204
3221
parser = date_parser ,
3205
3222
dayfirst = dayfirst ),
3223
+ cache = cache_dates ,
3206
3224
errors = 'ignore' )
3207
3225
except Exception :
3208
3226
return generic_parser (date_parser , * date_cols )
0 commit comments