@@ -36,9 +36,77 @@ def _guess_datetime_format_for_array(arr, **kwargs):
36
36
return _guess_datetime_format (arr [non_nan_elements [0 ]], ** kwargs )
37
37
38
38
39
+ def _maybe_cache (arg , format , cache , tz , convert_listlike ):
40
+ """
41
+ Create a cache of unique dates from an array of dates
42
+
43
+ Parameters
44
+ ----------
45
+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
46
+ format : string
47
+ Strftime format to parse time
48
+ cache : boolean
49
+ True attempts to create a cache of converted values
50
+ tz : string
51
+ Timezone of the dates
52
+ convert_listlike : function
53
+ Conversion function to apply on dates
54
+
55
+ Returns
56
+ -------
57
+ cache_array : Series
58
+ Cache of converted, unique dates. Can be empty
59
+ """
60
+ from pandas import Series
61
+ cache_array = Series ()
62
+ if cache :
63
+ # Perform a quicker unique check
64
+ from pandas import Index
65
+ if not Index (arg ).is_unique :
66
+ unique_dates = algorithms .unique (arg )
67
+ cache_dates = convert_listlike (unique_dates , True , format , tz = tz )
68
+ cache_array = Series (cache_dates , index = unique_dates )
69
+ return cache_array
70
+
71
+
72
+ def _convert_and_box_cache (arg , cache_array , box , errors , name = None ):
73
+ """
74
+ Convert array of dates with a cache and box the result
75
+
76
+ Parameters
77
+ ----------
78
+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
79
+ cache_array : Series
80
+ Cache of converted, unique dates
81
+ box : boolean
82
+ True boxes result as an Index-like, False returns an ndarray
83
+ errors : string
84
+ 'ignore' plus box=True will convert result to Index
85
+ name : string, default None
86
+ Name for a DatetimeIndex
87
+
88
+ Returns
89
+ -------
90
+ result : datetime of converted dates
91
+ Returns:
92
+
93
+ - Index-like if box=True
94
+ - ndarray if box=False
95
+ """
96
+ from pandas import Series , DatetimeIndex , Index
97
+ result = Series (arg ).map (cache_array )
98
+ if box :
99
+ if errors == 'ignore' :
100
+ return Index (result )
101
+ else :
102
+ return DatetimeIndex (result , name = name )
103
+ return result .values
104
+
105
+
39
106
def to_datetime (arg , errors = 'raise' , dayfirst = False , yearfirst = False ,
40
107
utc = None , box = True , format = None , exact = True ,
41
- unit = None , infer_datetime_format = False , origin = 'unix' ):
108
+ unit = None , infer_datetime_format = False , origin = 'unix' ,
109
+ cache = False ):
42
110
"""
43
111
Convert argument to datetime.
44
112
@@ -111,7 +179,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
111
179
origin.
112
180
113
181
.. versionadded: 0.20.0
182
+ cache : boolean, default False
183
+ If True, use a cache of unique, converted dates to apply the datetime
184
+ conversion. May produce sigificant speed-up when parsing duplicate date
185
+ strings, especially ones with timezone offsets.
114
186
187
+ .. versionadded: 0.22.0
115
188
Returns
116
189
-------
117
190
ret : datetime if parsing succeeded.
@@ -369,15 +442,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
369
442
if isinstance (arg , tslib .Timestamp ):
370
443
result = arg
371
444
elif isinstance (arg , ABCSeries ):
372
- from pandas import Series
373
- values = _convert_listlike (arg ._values , True , format )
374
- result = Series (values , index = arg .index , name = arg .name )
445
+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
446
+ if not cache_array .empty :
447
+ result = arg .map (cache_array )
448
+ else :
449
+ from pandas import Series
450
+ values = _convert_listlike (arg ._values , True , format )
451
+ result = Series (values , index = arg .index , name = arg .name )
375
452
elif isinstance (arg , (ABCDataFrame , MutableMapping )):
376
453
result = _assemble_from_unit_mappings (arg , errors = errors )
377
454
elif isinstance (arg , ABCIndexClass ):
378
- result = _convert_listlike (arg , box , format , name = arg .name )
455
+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
456
+ if not cache_array .empty :
457
+ result = _convert_and_box_cache (arg , cache_array , box , errors ,
458
+ name = arg .name )
459
+ else :
460
+ result = _convert_listlike (arg , box , format , name = arg .name )
379
461
elif is_list_like (arg ):
380
- result = _convert_listlike (arg , box , format )
462
+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
463
+ if not cache_array .empty :
464
+ result = _convert_and_box_cache (arg , cache_array , box , errors )
465
+ else :
466
+ result = _convert_listlike (arg , box , format )
381
467
else :
382
468
result = _convert_listlike (np .array ([arg ]), box , format )[0 ]
383
469
0 commit comments