|
1 | 1 | from datetime import datetime, timedelta, time
|
2 | 2 | import numpy as np
|
| 3 | +import pandas as pd |
3 | 4 | from collections import MutableMapping
|
4 | 5 |
|
5 | 6 | from pandas._libs import lib, tslib
|
@@ -183,7 +184,8 @@ def _guess_datetime_format_for_array(arr, **kwargs):
|
183 | 184 |
|
184 | 185 | def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
|
185 | 186 | utc=None, box=True, format=None, exact=True,
|
186 |
| - unit=None, infer_datetime_format=False, origin='unix'): |
| 187 | + unit=None, infer_datetime_format=False, origin='unix', |
| 188 | + cache_datetime=False): |
187 | 189 | """
|
188 | 190 | Convert argument to datetime.
|
189 | 191 |
|
@@ -257,6 +259,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
|
257 | 259 |
|
258 | 260 | .. versionadded: 0.20.0
|
259 | 261 |
|
| 262 | + cache_datetime : boolean, default False |
| 263 | + If True, use a cache of unique, converted dates to apply the datetime |
| 264 | + conversion. Produces signficant speed-ups when parsing duplicate dates |
| 265 | +
|
260 | 266 | Returns
|
261 | 267 | -------
|
262 | 268 | ret : datetime if parsing succeeded.
|
@@ -340,6 +346,19 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
|
340 | 346 |
|
341 | 347 | tz = 'utc' if utc else None
|
342 | 348 |
|
| 349 | + cache = None |
| 350 | + if (cache_datetime and is_list_like(arg) and |
| 351 | + not isinstance(arg, DatetimeIndex)): |
| 352 | + # No need to convert with a cache if the arg is already a DatetimeIndex |
| 353 | + unique_dates = pd.unique(arg) |
| 354 | + if len(unique_dates) != len(arg): |
| 355 | + cache = {d: pd.to_datetime(d, errors=errors, dayfirst=dayfirst, |
| 356 | + yearfirst=yearfirst, utc=utc, box=box, format=format, |
| 357 | + exact=exact, unit=unit, |
| 358 | + infer_datetime_format=infer_datetime_format, |
| 359 | + origin=origin, cache_datetime=False) |
| 360 | + for d in unique_dates} |
| 361 | + |
343 | 362 | def _convert_listlike(arg, box, format, name=None, tz=tz):
|
344 | 363 |
|
345 | 364 | if isinstance(arg, (list, tuple)):
|
@@ -505,15 +524,27 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
|
505 | 524 | if isinstance(arg, tslib.Timestamp):
|
506 | 525 | result = arg
|
507 | 526 | elif isinstance(arg, ABCSeries):
|
508 |
| - from pandas import Series |
509 |
| - values = _convert_listlike(arg._values, False, format) |
510 |
| - result = Series(values, index=arg.index, name=arg.name) |
| 527 | + if cache: |
| 528 | + result = arg.map(cache) |
| 529 | + else: |
| 530 | + values = _convert_listlike(arg._values, False, format) |
| 531 | + result = pd.Series(values, index=arg.index, name=arg.name) |
511 | 532 | elif isinstance(arg, (ABCDataFrame, MutableMapping)):
|
512 | 533 | result = _assemble_from_unit_mappings(arg, errors=errors)
|
513 | 534 | elif isinstance(arg, ABCIndexClass):
|
514 |
| - result = _convert_listlike(arg, box, format, name=arg.name) |
| 535 | + if cache: |
| 536 | + result = pd.Series(arg.values).map(cache).values |
| 537 | + if box: |
| 538 | + result = DatetimeIndex(result, tz=tz, name=arg.name) |
| 539 | + else: |
| 540 | + result = _convert_listlike(arg, box, format, name=arg.name) |
515 | 541 | elif is_list_like(arg):
|
516 |
| - result = _convert_listlike(arg, box, format) |
| 542 | + if cache: |
| 543 | + result = pd.Series(arg).map(cache).values |
| 544 | + if box: |
| 545 | + result = DatetimeIndex(result, tz=tz) |
| 546 | + else: |
| 547 | + result = _convert_listlike(arg, box, format) |
517 | 548 | else:
|
518 | 549 | result = _convert_listlike(np.array([arg]), box, format)[0]
|
519 | 550 |
|
|
0 commit comments