Skip to content

Commit be7af6b

Browse files
committed
Add more cache conditions
1 parent ef7e0ba commit be7af6b

File tree

3 files changed

+54
-8
lines changed

3 files changed

+54
-8
lines changed

asv_bench/benchmarks/timeseries.py

+8
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,8 @@ def setup(self):
356356

357357
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
358358
self.s2 = self.s.str.replace(':\\S+$', '')
359+
self.numeric_data = Series([range(100000)])
360+
self.datetime_data = [dt.datetime(2010, 1, 1)] * 100000
359361

360362
def time_format_YYYYMMDD(self):
361363
to_datetime(self.stringsD, format='%Y%m%d')
@@ -381,6 +383,12 @@ def time_format_exact(self):
381383
def time_format_no_exact(self):
382384
to_datetime(self.s, format='%d%b%y', exact=False)
383385

386+
def time_cache_numeric_data(self):
387+
to_datetime(self.numeric_data)
388+
389+
def time_cache_datetime_data(self):
390+
to_datetime(self.datetime_data)
391+
384392

385393
class Offsets(object):
386394
goal_time = 0.2

pandas/core/tools/datetimes.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
is_float,
1717
is_list_like,
1818
is_scalar,
19-
is_numeric_dtype)
19+
is_numeric_dtype,
20+
is_string_dtype)
2021
from pandas.core.dtypes.generic import (
2122
ABCIndexClass, ABCSeries,
2223
ABCDataFrame, ABCDateOffset)
@@ -521,14 +522,19 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
521522
arg = arg + offset
522523

523524
convert_cache = None
524-
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
525+
if cache and is_list_like(arg):
526+
# Create a cache only if there are more than 10k values and the user
527+
# passes in datestrings
528+
min_cache_threshold = 10**5
529+
if len(arg) >= min_cache_threshold and is_string_dtype(arg):
525530
# unique currently cannot determine dates that are out of bounds
526-
# use the cache only if the data is a string and there are more than 10**5 values
527-
unique_dates = algorithms.unique(arg)
528-
if len(unique_dates) != len(arg):
529-
from pandas import Series
530-
cache_data = _convert_listlike(unique_dates, True, format)
531-
convert_cache = Series(cache_data, index=unique_dates)
531+
# recurison errors with datetime
532+
unique_dates = algorithms.unique(arg)
533+
# Essentially they need to all be the same value
534+
if len(unique_dates) == 1:
535+
from pandas import Series
536+
cache_data = _convert_listlike(unique_dates, True, format)
537+
convert_cache = Series(cache_data, index=unique_dates)
532538

533539
if isinstance(arg, tslib.Timestamp):
534540
result = arg

pandas/tests/indexes/datetimes/test_tools.py

+32
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,38 @@ def test_datetime_invalid_datatype(self):
370370
pd.to_datetime(bool)
371371
with pytest.raises(TypeError):
372372
pd.to_datetime(pd.to_datetime)
373+
374+
@pytest.mark.parametrize("utc", [True, None])
375+
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
376+
@pytest.mark.parametrize("box", [True, False])
377+
@pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
378+
def test_to_datetime_cache(self, utc, format, box, constructor):
379+
date = '20130101 00:00:00'
380+
test_dates = [date] * 10**5
381+
data = constructor(test_dates)
382+
result = pd.to_datetime(data, utc=utc, format=format, box=box)
383+
expected = pd.to_datetime(data, utc=utc, format=format, box=box,
384+
cache=False)
385+
if box:
386+
tm.assert_index_equal(result, expected)
387+
else:
388+
tm.assert_numpy_array_equal(result, expected)
389+
390+
@pytest.mark.parametrize("utc", [True, None])
391+
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
392+
def test_to_datetime_cache_series(self, utc, format):
393+
date = '20130101 00:00:00'
394+
test_dates = [date] * 10**5
395+
data = pd.Series(test_dates)
396+
result = pd.to_datetime(data, utc=utc, format=format, cache=True)
397+
expected = pd.to_datetime(data, utc=utc, format=format)
398+
tm.assert_series_equal(result, expected)
399+
400+
def test_to_datetime_cache_scalar(self):
401+
date = '20130101 00:00:00'
402+
result = pd.to_datetime(date, cache=True)
403+
expected = pd.Timestamp('20130101 00:00:00')
404+
assert result == expected
373405

374406

375407
class TestToDatetimeUnit(object):

0 commit comments

Comments
 (0)