Skip to content

Commit 55ee9a0

Browse files
mroeschkePingviinituutti
authored andcommitted
PERF: Use is_utc check to improve performance of dateutil UTC in DatetimeIndex methods (pandas-dev#23772)
1 parent a626343 commit 55ee9a0

File tree

6 files changed

+53
-19
lines changed

6 files changed

+53
-19
lines changed

asv_bench/benchmarks/timeseries.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import timedelta
22

3+
import dateutil
34
import numpy as np
45
from pandas import to_datetime, date_range, Series, DataFrame, period_range
56
from pandas.tseries.frequencies import infer_freq
@@ -57,7 +58,10 @@ def time_to_pydatetime(self, index_type):
5758

5859
class TzLocalize(object):
5960

60-
def setup(self):
61+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
62+
param_names = 'tz'
63+
64+
def setup(self, tz):
6165
dst_rng = date_range(start='10/29/2000 1:00:00',
6266
end='10/29/2000 1:59:59', freq='S')
6367
self.index = date_range(start='10/29/2000',
@@ -68,8 +72,8 @@ def setup(self):
6872
end='10/29/2000 3:00:00',
6973
freq='S'))
7074

71-
def time_infer_dst(self):
72-
self.index.tz_localize('US/Eastern', ambiguous='infer')
75+
def time_infer_dst(self, tz):
76+
self.index.tz_localize(tz, ambiguous='infer')
7377

7478

7579
class ResetIndex(object):
@@ -377,15 +381,35 @@ def time_dup_string_tzoffset_dates(self, cache):
377381

378382
class DatetimeAccessor(object):
379383

380-
def setup(self):
384+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
385+
param_names = 'tz'
386+
387+
def setup(self, tz):
381388
N = 100000
382-
self.series = Series(date_range(start='1/1/2000', periods=N, freq='T'))
389+
self.series = Series(
390+
date_range(start='1/1/2000', periods=N, freq='T', tz=tz)
391+
)
383392

384-
def time_dt_accessor(self):
393+
def time_dt_accessor(self, tz):
385394
self.series.dt
386395

387-
def time_dt_accessor_normalize(self):
396+
def time_dt_accessor_normalize(self, tz):
388397
self.series.dt.normalize()
389398

399+
def time_dt_accessor_month_name(self, tz):
400+
self.series.dt.month_name()
401+
402+
def time_dt_accessor_day_name(self, tz):
403+
self.series.dt.day_name()
404+
405+
def time_dt_accessor_time(self, tz):
406+
self.series.dt.time
407+
408+
def time_dt_accessor_date(self, tz):
409+
self.series.dt.date
410+
411+
def time_dt_accessor_year(self, tz):
412+
self.series.dt.year
413+
390414

391415
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/timestamp.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pandas import Timestamp
44
import pytz
5+
import dateutil
56

67

78
class TimestampConstruction(object):
@@ -29,7 +30,8 @@ def time_fromtimestamp(self):
2930

3031

3132
class TimestampProperties(object):
32-
_tzs = [None, pytz.timezone('Europe/Amsterdam')]
33+
_tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
34+
dateutil.tz.tzutc()]
3335
_freqs = [None, 'B']
3436
params = [_tzs, _freqs]
3537
param_names = ['tz', 'freq']
@@ -87,7 +89,8 @@ def time_microsecond(self, tz, freq):
8789

8890

8991
class TimestampOps(object):
90-
params = [None, 'US/Eastern', 'UTC']
92+
params = [None, 'US/Eastern', pytz.UTC,
93+
dateutil.tz.tzutc()]
9194
param_names = ['tz']
9295

9396
def setup(self, tz):
@@ -105,6 +108,14 @@ def time_to_pydatetime(self, tz):
105108
def time_normalize(self, tz):
106109
self.ts.normalize()
107110

111+
def time_tz_convert(self, tz):
112+
if self.ts.tz is not None:
113+
self.ts.tz_convert(tz)
114+
115+
def time_tz_localize(self, tz):
116+
if self.ts.tz is None:
117+
self.ts.tz_localize(tz)
118+
108119

109120
class TimestampAcrossDst(object):
110121
def setup(self):

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,7 @@ Performance Improvements
11431143
- Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
11441144
- Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
11451145
- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
1146+
- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
11461147

11471148

11481149
.. _whatsnew_0240.docs:

pandas/_libs/tslibs/conversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
882882
bint shift = False, fill_nonexist = False
883883

884884
# Vectorized version of DstTzInfo.localize
885-
if tz == UTC or tz is None:
885+
if is_utc(tz) or tz is None:
886886
return vals
887887

888888
result = np.empty(n, dtype=np.int64)

pandas/core/arrays/datetimes.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,8 @@ def _to_m8(key, tz=None):
5858
def _field_accessor(name, field, docstring=None):
5959
def f(self):
6060
values = self.asi8
61-
if self.tz is not None:
62-
if self.tz is not utc:
63-
values = self._local_timestamps()
61+
if self.tz is not None and not timezones.is_utc(self.tz):
62+
values = self._local_timestamps()
6463

6564
if field in self._bool_ops:
6665
if field.endswith(('start', 'end')):
@@ -978,7 +977,7 @@ def month_name(self, locale=None):
978977
>>> idx.month_name()
979978
Index(['January', 'February', 'March'], dtype='object')
980979
"""
981-
if self.tz is not None and self.tz is not utc:
980+
if self.tz is not None and not timezones.is_utc(self.tz):
982981
values = self._local_timestamps()
983982
else:
984983
values = self.asi8
@@ -1014,7 +1013,7 @@ def day_name(self, locale=None):
10141013
>>> idx.day_name()
10151014
Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
10161015
"""
1017-
if self.tz is not None and self.tz is not utc:
1016+
if self.tz is not None and not timezones.is_utc(self.tz):
10181017
values = self._local_timestamps()
10191018
else:
10201019
values = self.asi8
@@ -1032,7 +1031,7 @@ def time(self):
10321031
# If the Timestamps have a timezone that is not UTC,
10331032
# convert them into their i8 representation while
10341033
# keeping their timezone and not using UTC
1035-
if self.tz is not None and self.tz is not utc:
1034+
if self.tz is not None and not timezones.is_utc(self.tz):
10361035
timestamps = self._local_timestamps()
10371036
else:
10381037
timestamps = self.asi8
@@ -1056,7 +1055,7 @@ def date(self):
10561055
# If the Timestamps have a timezone that is not UTC,
10571056
# convert them into their i8 representation while
10581057
# keeping their timezone and not using UTC
1059-
if self.tz is not None and self.tz is not utc:
1058+
if self.tz is not None and not timezones.is_utc(self.tz):
10601059
timestamps = self._local_timestamps()
10611060
else:
10621061
timestamps = self.asi8

pandas/core/indexes/datetimes.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import warnings
77

88
import numpy as np
9-
from pytz import utc
109

1110
from pandas._libs import (
1211
Timestamp, index as libindex, join as libjoin, lib, tslib as libts)
@@ -474,7 +473,7 @@ def astype(self, dtype, copy=True):
474473

475474
def _get_time_micros(self):
476475
values = self.asi8
477-
if self.tz is not None and self.tz is not utc:
476+
if self.tz is not None and not timezones.is_utc(self.tz):
478477
values = self._local_timestamps()
479478
return fields.get_time_micros(values)
480479

0 commit comments

Comments
 (0)