Skip to content

Commit 5f9fb96

Browse files
committed
PERF: leverage tzlocal package to provide 2000x speedup for dateutil.tz.tzlocal operations
1 parent fdc4db2 commit 5f9fb96

21 files changed

+138
-15
lines changed

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"xlrd": [],
5252
"xlwt": [],
5353
"pytest": [],
54+
"tzlocal": [],
5455
// If using Windows with python 2.7 and want to build using the
5556
// mingw toolchain (rather than MSVC), uncomment the following line.
5657
// "libpython": [],

ci/deps/azure-27-compat.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ dependencies:
1010
- numpy=1.12.0
1111
- openpyxl=2.5.5
1212
- pytables=3.4.2
13-
- python-dateutil=2.5.0
13+
- python-dateutil=2.6.0
1414
- python=2.7*
1515
- pytz=2013b
1616
- scipy=0.18.1
17+
- tzlocal=1.5.1
1718
- xlrd=1.0.0
1819
- xlsxwriter=0.5.2
1920
- xlwt=0.7.5

ci/deps/azure-27-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- pytz=2013b
1717
- scipy
1818
- sqlalchemy=0.8.1
19+
- tzlocal=1.5.1
1920
- xlrd=1.0.0
2021
- xlsxwriter=0.5.2
2122
- xlwt=0.7.5

ci/deps/azure-macos-35.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- pytables
1818
- python=3.5*
1919
- pytz
20+
- tzlocal
2021
- xarray
2122
- xlrd
2223
- xlsxwriter
@@ -25,5 +26,5 @@ dependencies:
2526
- pytest
2627
- pytest-xdist
2728
- pip:
28-
- python-dateutil==2.5.3
29+
- python-dateutil==2.6.0
2930
- hypothesis>=3.58.0

ci/deps/azure-windows-27.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies:
2020
- s3fs
2121
- scipy
2222
- sqlalchemy
23+
- tzlocal
2324
- xlrd
2425
- xlsxwriter
2526
- xlwt

ci/deps/azure-windows-36.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ dependencies:
1818
- python=3.6.6
1919
- pytz
2020
- scipy
21+
- tzlocal
2122
- xlrd
2223
- xlsxwriter
2324
- xlwt

ci/deps/travis-27.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ dependencies:
2828
- pytables
2929
- blosc=1.14.3
3030
- python-blosc
31-
- python-dateutil=2.5.0
31+
- python-dateutil=2.6.0
3232
- python=2.7*
3333
- pytz=2013b
3434
- s3fs
3535
- scipy
3636
- sqlalchemy=0.9.6
37+
- tzlocal
3738
- xarray=0.9.6
3839
- xlrd=1.0.0
3940
- xlsxwriter=0.5.2

ci/deps/travis-36-doc.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies:
3636
- sphinx
3737
- sqlalchemy
3838
- statsmodels
39+
- tzlocal
3940
- xarray
4041
- xlrd
4142
- xlsxwriter

doc/source/install.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ Recommended Dependencies
241241
evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed,
242242
must be Version 1.2.0 or higher.
243243

244+
* `tzlocal <https://github.com/regebro/tzlocal>`__: significantly beneficial to performance if using the `dateutil.tz.tzlocal()` timezone.
245+
244246
.. note::
245247

246248
You are highly encouraged to install these libraries, as they provide speed improvements, especially
@@ -328,7 +330,6 @@ Optional Dependencies
328330
to get the necessary dependencies for installation of `lxml`_. This
329331
will prevent further headaches down the line.
330332

331-
332333
.. _html5lib: https://github.com/html5lib/html5lib-python
333334
.. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup
334335
.. _lxml: http://lxml.de

environment.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
# required
77
- numpy>=1.15
88
- python=3
9-
- python-dateutil>=2.5.0
9+
- python-dateutil>=2.6.0
1010
- pytz
1111

1212
# development
@@ -48,6 +48,7 @@ dependencies:
4848
- seaborn
4949
- sqlalchemy
5050
- statsmodels
51+
- tzlocal
5152
- xarray
5253
- xlrd
5354
- xlsxwriter

pandas/_libs/tslib.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
2828
from pandas._libs.tslibs.parsing import parse_datetime_string
2929

3030
from pandas._libs.tslibs.timedeltas cimport cast_from_unit
31-
from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
31+
from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info, get_tzlocal_tz
3232
from pandas._libs.tslibs.timezones import UTC
3333
from pandas._libs.tslibs.conversion cimport (
3434
tz_convert_single, _TSObject, convert_datetime_to_tsobject,
@@ -124,6 +124,9 @@ def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
124124
raise ValueError("box must be one of 'datetime', 'date', 'time' or"
125125
" 'timestamp'")
126126

127+
if is_tzlocal(tz):
128+
tz = get_tzlocal_tz(tz)
129+
127130
if is_utc(tz) or tz is None:
128131
for i in range(n):
129132
value = arr[i]

pandas/_libs/tslibs/conversion.pyx

+41-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from pandas._libs.tslibs.timedeltas cimport (cast_from_unit,
3333
delta_to_nanoseconds)
3434
from pandas._libs.tslibs.timezones cimport (
3535
is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info,
36-
get_timezone, maybe_get_tz, tz_compare)
36+
get_timezone, maybe_get_tz, tz_compare, get_tzlocal_tz)
3737
from pandas._libs.tslibs.timezones import UTC
3838
from pandas._libs.tslibs.parsing import parse_datetime_string
3939

@@ -541,9 +541,19 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
541541
int64_t local_val
542542
Py_ssize_t pos
543543
str typ
544+
bint tz_changed
544545

545546
assert obj.tzinfo is None
546547

548+
orig_tz = tz
549+
tz_changed = False
550+
551+
if is_tzlocal(tz):
552+
new_tz = get_tzlocal_tz(tz)
553+
if new_tz != tz:
554+
tz_changed = True
555+
tz = new_tz
556+
547557
if is_utc(tz):
548558
pass
549559
elif obj.value == NPY_NAT:
@@ -574,7 +584,12 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
574584
# so this branch will never be reached.
575585
pass
576586

577-
obj.tzinfo = tz
587+
if tz_changed:
588+
# We want to return tzlocal() if provided it, even if we map it
589+
# to a real tz for performance reasons
590+
obj.tzinfo = orig_tz
591+
else:
592+
obj.tzinfo = tz
578593

579594

580595
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
@@ -652,6 +667,9 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
652667
bint tz_is_local
653668

654669
tz_is_local = is_tzlocal(tz)
670+
if tz_is_local:
671+
tz = get_tzlocal_tz(tz)
672+
tz_is_local = is_tzlocal(tz)
655673

656674
if not tz_is_local:
657675
# get_dst_info cannot extract offsets from tzlocal because its
@@ -763,6 +781,11 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
763781
if val == NPY_NAT:
764782
return val
765783

784+
if is_tzlocal(tz1):
785+
tz1 = get_tzlocal_tz(tz1)
786+
if is_tzlocal(tz2):
787+
tz2 = get_tzlocal_tz(tz2)
788+
766789
# Convert to UTC
767790
if is_tzlocal(tz1):
768791
utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True)
@@ -807,10 +830,16 @@ cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz,
807830
int64_t[:] converted, result
808831
Py_ssize_t i, n = len(vals)
809832
int64_t val
833+
bint tz_is_local
834+
835+
tz_is_local = is_tzlocal(tz)
836+
if tz_is_local:
837+
tz = get_tzlocal_tz(tz)
838+
tz_is_local = is_tzlocal(tz)
810839

811840
if not is_utc(get_timezone(tz)):
812841
converted = np.empty(n, dtype=np.int64)
813-
if is_tzlocal(tz):
842+
if tz_is_local:
814843
for i in range(n):
815844
val = vals[i]
816845
if val == NPY_NAT:
@@ -915,6 +944,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
915944

916945
result = np.empty(n, dtype=np.int64)
917946

947+
if is_tzlocal(tz):
948+
tz = get_tzlocal_tz(tz)
949+
918950
if is_tzlocal(tz):
919951
for i in range(n):
920952
v = vals[i]
@@ -1222,6 +1254,9 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
12221254
npy_datetimestruct dts
12231255
int64_t delta, local_val
12241256

1257+
if is_tzlocal(tz):
1258+
tz = get_tzlocal_tz(tz)
1259+
12251260
if is_tzlocal(tz):
12261261
for i in range(n):
12271262
if stamps[i] == NPY_NAT:
@@ -1301,6 +1336,9 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
13011336
int64_t local_val, delta
13021337
str typ
13031338

1339+
if is_tzlocal(tz):
1340+
tz = get_tzlocal_tz(tz)
1341+
13041342
if tz is None or is_utc(tz):
13051343
for i in range(n):
13061344
dt64_to_dtstruct(stamps[i], &dts)

pandas/_libs/tslibs/period.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ cimport pandas._libs.tslibs.util as util
3232
from pandas._libs.tslibs.util cimport is_period_object, is_string_object
3333

3434
from pandas._libs.tslibs.timestamps import Timestamp
35-
from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
35+
from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info, get_tzlocal_tz
3636
from pandas._libs.tslibs.timedeltas import Timedelta
3737
from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds
3838

@@ -1505,6 +1505,9 @@ cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps,
15051505
npy_datetimestruct dts
15061506
int64_t local_val
15071507

1508+
if is_tzlocal(tz):
1509+
tz = get_tzlocal_tz(tz)
1510+
15081511
if is_utc(tz) or tz is None:
15091512
with nogil:
15101513
for i in range(n):

pandas/_libs/tslibs/resolution.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ from pandas._libs.tslibs.np_datetime cimport (
99
npy_datetimestruct, dt64_to_dtstruct)
1010
from pandas._libs.tslibs.frequencies cimport get_freq_code
1111
from pandas._libs.tslibs.timezones cimport (
12-
is_utc, is_tzlocal, maybe_get_tz, get_dst_info)
12+
is_utc, is_tzlocal, maybe_get_tz, get_dst_info, get_tzlocal_tz)
1313
from pandas._libs.tslibs.conversion cimport tz_convert_utc_to_tzlocal
1414
from pandas._libs.tslibs.ccalendar cimport get_days_in_month
1515

@@ -49,6 +49,9 @@ cdef _reso_local(int64_t[:] stamps, object tz):
4949
npy_datetimestruct dts
5050
int64_t local_val, delta
5151

52+
if is_tzlocal(tz):
53+
tz = get_tzlocal_tz(tz)
54+
5255
if is_utc(tz) or tz is None:
5356
for i in range(n):
5457
if stamps[i] == NPY_NAT:

pandas/_libs/tslibs/timezones.pxd

+4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
cpdef bint is_utc(object tz)
44
cdef bint is_tzlocal(object tz)
55

6+
cpdef bint have_tzlocal_package()
7+
cpdef object get_tzlocal_tz(object tz)
8+
cpdef _set_tzlocal_tz(object tz)
9+
610
cdef bint treat_tz_as_pytz(object tz)
711
cdef bint treat_tz_as_dateutil(object tz)
812

pandas/_libs/tslibs/timezones.pyx

+34
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
1212
import pytz
1313
UTC = pytz.utc
1414

15+
tzlocal_package = None
16+
tzlocal_tz = None
1517

1618
import numpy as np
1719
cimport numpy as cnp
@@ -34,6 +36,38 @@ cdef inline bint is_tzlocal(object tz):
3436
return isinstance(tz, _dateutil_tzlocal)
3537

3638

39+
cpdef bint have_tzlocal_package():
40+
global tzlocal_package
41+
if tzlocal_package is None:
42+
try:
43+
import tzlocal
44+
tzlocal_package = tzlocal
45+
except ImportError:
46+
pass
47+
48+
return tzlocal_package is not None
49+
50+
51+
cpdef object get_tzlocal_tz(object tz):
52+
if have_tzlocal_package():
53+
global tzlocal_tz
54+
if tzlocal_tz is None:
55+
local_tz = tzlocal_package.get_localzone()
56+
tzlocal_tz = local_tz.zone
57+
58+
try:
59+
return pytz.timezone(tzlocal_tz)
60+
except pytz.exceptions.UnknownTimeZoneError:
61+
tzlocal_tz = None
62+
63+
return tz
64+
65+
66+
cpdef _set_tzlocal_tz(object tz_str):
67+
global tzlocal_tz
68+
tzlocal_tz = str(tz_str)
69+
70+
3771
cdef inline bint treat_tz_as_pytz(object tz):
3872
return (hasattr(tz, '_utc_transition_times') and
3973
hasattr(tz, '_transition_info'))

pandas/tests/indexes/datetimes/test_timezones.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,8 @@ def test_timetz_accessor(self, tz_naive_fixture):
820820

821821
index = DatetimeIndex(['2018-06-04 10:20:30', pd.NaT], tz=tz)
822822
result = index.timetz
823-
823+
print('result', result)
824+
print('expected', expected)
824825
tm.assert_numpy_array_equal(result, expected)
825826

826827
def test_dti_drop_dont_lose_tz(self):

pandas/tests/series/test_datetime_values.py

+2
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,8 @@ def test_dt_timetz_accessor(self, tz_naive_fixture):
536536
expected = Series([time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz),
537537
time(22, 14, tzinfo=tz)])
538538
result = s.dt.timetz
539+
print('result', result)
540+
print('expected', expected)
539541
tm.assert_series_equal(result, expected)
540542

541543
def test_setitem_with_string_index(self):

pandas/tests/tslibs/test_timezones.py

+24
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,30 @@ def test_tzlocal_offset():
4949
assert ts.value + offset == Timestamp("2011-01-01").value
5050

5151

52+
def test_tzlocal_package():
53+
if not timezones.have_tzlocal_package():
54+
pytest.skip("tzlocal package is not installed, can't test behavior")
55+
56+
orig_tzlocal_tz = timezones.get_tzlocal_tz(None)
57+
# Check that we fall back to non-tzlocal code path if user has an invalid
58+
# system tz set
59+
timezones._set_tzlocal_tz('foo/bar')
60+
ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal())
61+
assert ts.tz == dateutil.tz.tzlocal()
62+
63+
utc = pytz.utc
64+
for tz in pytz.all_timezones:
65+
# Check that all pytz timezones work
66+
ts_tz = Timestamp("2011-01-01", tz=tz)
67+
timezones._set_tzlocal_tz(tz)
68+
ts_local = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal())
69+
70+
assert ts_tz.astimezone(utc) == ts_local.astimezone(utc)
71+
assert ts_local.tz == dateutil.tz.tzlocal()
72+
73+
timezones._set_tzlocal_tz(orig_tzlocal_tz.zone)
74+
75+
5276
@pytest.fixture(params=[
5377
(pytz.timezone("US/Eastern"), lambda tz, x: tz.localize(x)),
5478
(dateutil.tz.gettz("US/Eastern"), lambda tz, x: x.replace(tzinfo=tz))

0 commit comments

Comments
 (0)