Skip to content

Commit ef7e0ba

Browse files
committed
Move cache down the stack, explore threshold to trigger cache
1 parent b542e53 commit ef7e0ba

File tree

1 file changed

+31
-15
lines changed

1 file changed

+31
-15
lines changed

pandas/core/tools/datetimes.py

+31-15
Original file line numberDiff line numberDiff line change
@@ -354,16 +354,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
354354

355355
def _convert_listlike(arg, box, format, name=None, tz=tz):
356356

357-
datetime_cache = None
358-
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
359-
unique_dates = algorithms.unique(arg)
360-
if len(unique_dates) != len(arg):
361-
datetime_cache = Series(pd.to_datetime(unique_dates,
362-
errors=errors, dayfirst=dayfirst,
363-
yearfirst=yearfirst, utc=utc, box=box, format=format,
364-
exact=exact, unit=unit,
365-
infer_datetime_format=infer_datetime_format,
366-
origin=origin, cache=False), index=unique_dates)
357+
367358
if isinstance(arg, (list, tuple)):
368359
arg = np.array(arg, dtype='O')
369360

@@ -529,18 +520,43 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
529520
arg = np.asarray(arg)
530521
arg = arg + offset
531522

523+
convert_cache = None
524+
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
525+
# unique currently cannot determine dates that are out of bounds
526+
# use the cache only if the data is a string and there are more than 10**5 values
527+
unique_dates = algorithms.unique(arg)
528+
if len(unique_dates) != len(arg):
529+
from pandas import Series
530+
cache_data = _convert_listlike(unique_dates, True, format)
531+
convert_cache = Series(cache_data, index=unique_dates)
532+
532533
if isinstance(arg, tslib.Timestamp):
533534
result = arg
534535
elif isinstance(arg, ABCSeries):
535-
from pandas import Series
536-
values = _convert_listlike(arg._values, True, format)
537-
result = Series(values, index=arg.index, name=arg.name)
536+
if convert_cache is not None:
537+
result = arg.map(convert_cache)
538+
else:
539+
from pandas import Series
540+
values = _convert_listlike(arg._values, True, format)
541+
result = Series(values, index=arg.index, name=arg.name)
538542
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
539543
result = _assemble_from_unit_mappings(arg, errors=errors)
540544
elif isinstance(arg, ABCIndexClass):
541-
result = _convert_listlike(arg, box, format, name=arg.name)
545+
if convert_cache is not None:
546+
from pandas import Series
547+
result = Series(arg).map(convert_cache).values
548+
if box:
549+
result = DatetimeIndex(result, tz=tz, name=arg.name)
550+
else:
551+
result = _convert_listlike(arg, box, format, name=arg.name)
542552
elif is_list_like(arg):
543-
result = _convert_listlike(arg, box, format)
553+
if convert_cache is not None:
554+
from pandas import Series
555+
result = Series(arg).map(convert_cache).values
556+
if box:
557+
result = DatetimeIndex(result, tz=tz)
558+
else:
559+
result = _convert_listlike(arg, box, format)
544560
else:
545561
result = _convert_listlike(np.array([arg]), box, format)[0]
546562

0 commit comments

Comments
 (0)