Skip to content

Commit 2a1a064

Browse files
committed
Some performance testing
1 parent 7a35c4b commit 2a1a064

File tree

3 files changed

+45
-19
lines changed

3 files changed

+45
-19
lines changed

asv_bench/benchmarks/timeseries.py

+35-9
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,17 @@ def setup(self):
356356

357357
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
358358
self.s2 = self.s.str.replace(':\\S+$', '')
359-
self.dup_numeric_data = Series([1000] * 100000)
360-
self.dup_string_data = ['2013-01-01'] * 100000
361-
self.dup_datetime_data = [dt.datetime(2010, 1, 1)] * 100000
359+
self.dup_numeric_data_10_5 = Series([1000] * 100000)
360+
self.dup_string_data_10_5 = ['2013-01-01 01:00:00'] * 100000
361+
self.dup_datetime_data_10_5 = [dt.datetime(2010, 1, 1)] * 100000
362+
363+
self.dup_numeric_data_10_3 = Series([1000] * 100)
364+
self.dup_string_data_10_3 = ['2013-01-01 01:00:00'] * 100
365+
self.dup_datetime_data_10_3 = [dt.datetime(2010, 1, 1)] * 100
366+
367+
self.dup_numeric_data_10_7 = Series([1000] * 10**7)
368+
self.dup_string_data_10_7 = ['2013-01-01 01:00:00'] * 10**7
369+
self.dup_datetime_data_10_7 = [dt.datetime(2010, 1, 1)] * 10**7
362370

363371
def time_format_YYYYMMDD(self):
364372
to_datetime(self.stringsD, format='%Y%m%d')
@@ -384,14 +392,32 @@ def time_format_exact(self):
384392
def time_format_no_exact(self):
385393
to_datetime(self.s, format='%d%b%y', exact=False)
386394

387-
def time_cache_dup_numeric_data(self):
388-
to_datetime(self.dup_numeric_data, unit='s')
395+
def time_cache_dup_numeric_data_10_3(self):
396+
to_datetime(self.dup_numeric_data_10_3, unit='s')
397+
398+
def time_cache_dup_datetime_data_10_3(self):
399+
to_datetime(self.dup_datetime_data_10_3)
400+
401+
def time_cache_dup_string_data_10_3(self):
402+
to_datetime(self.dup_string_data_10_3)
403+
404+
def time_cache_dup_numeric_data_10_5(self):
405+
to_datetime(self.dup_numeric_data_10_5, unit='s')
406+
407+
def time_cache_dup_datetime_data_10_5(self):
408+
to_datetime(self.dup_datetime_data_10_5)
409+
410+
def time_cache_dup_string_data_10_5(self):
411+
to_datetime(self.dup_string_data_10_5)
412+
413+
def time_cache_dup_numeric_data_10_7(self):
414+
to_datetime(self.dup_numeric_data_10_7, unit='s')
389415

390-
def time_cache_dup_datetime_data(self):
391-
to_datetime(self.dup_datetime_data)
416+
def time_cache_dup_datetime_data_10_7(self):
417+
to_datetime(self.dup_datetime_data_10_7)
392418

393-
def time_cache_dup_string_data(self):
394-
to_datetime(self.dup_string_data)
419+
def time_cache_dup_string_data_10_7(self):
420+
to_datetime(self.dup_string_data_10_7)
395421

396422

397423
class Offsets(object):

pandas/core/indexes/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def __new__(cls, data=None,
334334
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
335335
is_integer_dtype(data)):
336336
data = tools.to_datetime(data, dayfirst=dayfirst,
337-
yearfirst=yearfirst)
337+
yearfirst=yearfirst, cache=False)
338338

339339
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
340340

pandas/core/tools/datetimes.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
355355

356356
def _convert_listlike(arg, box, format, name=None, tz=tz):
357357

358-
358+
import pdb; pdb.set_trace()
359359
if isinstance(arg, (list, tuple)):
360360
arg = np.array(arg, dtype='O')
361361

@@ -525,16 +525,16 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
525525
if cache and is_list_like(arg):
526526
# Create a cache only if there are more than 10k values and the user
527527
# passes in datestrings
528-
min_cache_threshold = 10**5
529-
if len(arg) >= min_cache_threshold and is_string_dtype(arg):
528+
#min_cache_threshold = 10**5
529+
#if len(arg) >= min_cache_threshold and is_string_dtype(arg):
530530
# unique currently cannot determine dates that are out of bounds
531531
# recurison errors with datetime
532-
unique_dates = algorithms.unique(arg)
533-
# Essentially they need to all be the same value
534-
if len(unique_dates) == 1:
535-
from pandas import Series
536-
cache_data = _convert_listlike(unique_dates, True, format)
537-
convert_cache = Series(cache_data, index=unique_dates)
532+
unique_dates = algorithms.unique(arg)
533+
# Essentially they need to all be the same value
534+
if len(unique_dates) != len(arg):
535+
from pandas import Series
536+
cache_data = _convert_listlike(unique_dates, False, format)
537+
convert_cache = Series(cache_data, index=unique_dates)
538538

539539
if isinstance(arg, tslib.Timestamp):
540540
result = arg

0 commit comments

Comments
 (0)