Skip to content

Commit bc50159

Browse files
jbrockmendelvictor
authored and
victor
committed
Standardize special case in tz_conversion functions (pandas-dev#22181)
1 parent 861b147 commit bc50159

File tree

5 files changed

+55
-57
lines changed

5 files changed

+55
-57
lines changed

pandas/_libs/tslibs/conversion.pyx

+52-51
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz):
612612
# ----------------------------------------------------------------------
613613
# Timezone Conversion
614614

615-
cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
615+
cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
616616
bint to_utc=True):
617617
"""
618618
tz_convert for non-UTC non-tzlocal cases where we have to check
@@ -631,33 +631,26 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
631631
"""
632632
cdef:
633633
Py_ssize_t n = len(values)
634-
Py_ssize_t i, j, pos
634+
Py_ssize_t i, pos
635635
int64_t[:] result = np.empty(n, dtype=np.int64)
636-
ndarray[int64_t] tt, trans
636+
ndarray[int64_t] trans
637637
int64_t[:] deltas
638-
Py_ssize_t[:] posn
639638
int64_t v
640639

641640
trans, deltas, typ = get_dst_info(tz)
642641
if not to_utc:
643642
# We add `offset` below instead of subtracting it
644643
deltas = -1 * np.array(deltas, dtype='i8')
645644

646-
tt = values[values != NPY_NAT]
647-
if not len(tt):
648-
# if all NaT, return all NaT
649-
return values
650-
651-
posn = trans.searchsorted(tt, side='right')
652-
653-
j = 0
654645
for i in range(n):
655646
v = values[i]
656647
if v == NPY_NAT:
657648
result[i] = v
658649
else:
659-
pos = posn[j] - 1
660-
j += 1
650+
# TODO: Is it more efficient to call searchsorted pointwise or
651+
# on `values` outside the loop? We are not consistent about this.
652+
# relative effiency of pointwise increases with number of iNaTs
653+
pos = trans.searchsorted(v, side='right') - 1
661654
if pos < 0:
662655
raise ValueError('First time before start of DST info')
663656
result[i] = v - deltas[pos]
@@ -734,7 +727,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
734727
Py_ssize_t pos
735728
int64_t v, offset, utc_date
736729
npy_datetimestruct dts
737-
ndarray[int64_t] arr # TODO: Is there a lighter-weight way to do this?
730+
int64_t arr[1]
738731

739732
# See GH#17734 We should always be converting either from UTC or to UTC
740733
assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC')
@@ -746,7 +739,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
746739
if is_tzlocal(tz1):
747740
utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True)
748741
elif get_timezone(tz1) != 'UTC':
749-
arr = np.array([val])
742+
arr[0] = val
750743
utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0]
751744
else:
752745
utc_date = val
@@ -757,17 +750,54 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
757750
return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False)
758751
else:
759752
# Convert UTC to other timezone
760-
arr = np.array([utc_date])
753+
arr[0] = utc_date
761754
# Note: at least with cython 0.28.3, doing a lookup `[0]` in the next
762755
# line is sensitive to the declared return type of _tz_convert_dst;
763756
# if it is declared as returning ndarray[int64_t], a compile-time error
764757
# is raised.
765758
return _tz_convert_dst(arr, tz2, to_utc=False)[0]
766759

767760

761+
cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz,
762+
bint to_utc):
763+
"""
764+
Convert the given values (in i8) either to UTC or from UTC.
765+
766+
Parameters
767+
----------
768+
vals : int64 ndarray
769+
tz1 : string / timezone object
770+
to_utc : bint
771+
772+
Returns
773+
-------
774+
converted : ndarray[int64_t]
775+
"""
776+
cdef:
777+
int64_t[:] converted, result
778+
Py_ssize_t i, n = len(vals)
779+
int64_t val
780+
781+
if get_timezone(tz) != 'UTC':
782+
converted = np.empty(n, dtype=np.int64)
783+
if is_tzlocal(tz):
784+
for i in range(n):
785+
val = vals[i]
786+
if val == NPY_NAT:
787+
converted[i] = NPY_NAT
788+
else:
789+
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc)
790+
else:
791+
converted = _tz_convert_dst(vals, tz, to_utc)
792+
else:
793+
converted = vals
794+
795+
return converted
796+
797+
768798
@cython.boundscheck(False)
769799
@cython.wraparound(False)
770-
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
800+
def tz_convert(int64_t[:] vals, object tz1, object tz2):
771801
"""
772802
Convert the values (in i8) from timezone1 to timezone2
773803
@@ -781,45 +811,16 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
781811
-------
782812
int64 ndarray of converted
783813
"""
784-
785814
cdef:
786-
ndarray[int64_t] utc_dates, result
787-
Py_ssize_t i, j, pos, n = len(vals)
788-
int64_t v
815+
int64_t[:] utc_dates, converted
789816

790817
if len(vals) == 0:
791818
return np.array([], dtype=np.int64)
792819

793820
# Convert to UTC
794-
if get_timezone(tz1) != 'UTC':
795-
utc_dates = np.empty(n, dtype=np.int64)
796-
if is_tzlocal(tz1):
797-
for i in range(n):
798-
v = vals[i]
799-
if v == NPY_NAT:
800-
utc_dates[i] = NPY_NAT
801-
else:
802-
utc_dates[i] = _tz_convert_tzlocal_utc(v, tz1, to_utc=True)
803-
else:
804-
utc_dates = np.array(_tz_convert_dst(vals, tz1, to_utc=True))
805-
else:
806-
utc_dates = vals
807-
808-
if get_timezone(tz2) == 'UTC':
809-
return utc_dates
810-
811-
elif is_tzlocal(tz2):
812-
result = np.zeros(n, dtype=np.int64)
813-
for i in range(n):
814-
v = utc_dates[i]
815-
if v == NPY_NAT:
816-
result[i] = NPY_NAT
817-
else:
818-
result[i] = _tz_convert_tzlocal_utc(v, tz2, to_utc=False)
819-
return result
820-
else:
821-
# Convert UTC to other timezone
822-
return np.array(_tz_convert_dst(utc_dates, tz2, to_utc=False))
821+
utc_dates = _tz_convert_one_way(vals, tz1, to_utc=True)
822+
converted = _tz_convert_one_way(utc_dates, tz2, to_utc=False)
823+
return np.array(converted, dtype=np.int64)
823824

824825

825826
# TODO: cdef scalar version to call from convert_str_to_tsobject

pandas/_libs/tslibs/offsets.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ cimport cython
55
from cython cimport Py_ssize_t
66

77
import time
8-
from cpython.datetime cimport (PyDateTime_IMPORT, PyDateTime_CheckExact,
8+
from cpython.datetime cimport (PyDateTime_IMPORT,
99
datetime, timedelta,
1010
time as dt_time)
1111
PyDateTime_IMPORT

pandas/_libs/tslibs/parsing.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ Parsing functions for datetime and datetime-like strings.
66
import sys
77
import re
88

9-
cimport cython
109
from cython cimport Py_ssize_t
1110

1211

pandas/_libs/tslibs/resolution.pyx

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
33

4-
cimport cython
54
from cython cimport Py_ssize_t
65

76
import numpy as np
@@ -11,8 +10,7 @@ from util cimport is_string_object, get_nat
1110

1211
from np_datetime cimport npy_datetimestruct, dt64_to_dtstruct
1312
from frequencies cimport get_freq_code
14-
from timezones cimport (is_utc, is_tzlocal,
15-
maybe_get_tz, get_dst_info)
13+
from timezones cimport is_utc, is_tzlocal, maybe_get_tz, get_dst_info
1614
from conversion cimport tz_convert_utc_to_tzlocal
1715
from ccalendar cimport get_days_in_month
1816

pandas/_libs/tslibs/strptime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ except:
2323
import pytz
2424

2525
from cython cimport Py_ssize_t
26-
from cpython cimport PyFloat_Check
2726

2827
import numpy as np
2928
from numpy cimport int64_t
@@ -622,6 +621,7 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
622621
days_to_week = week_0_length + (7 * (week_of_year - 1))
623622
return 1 + days_to_week + day_of_week
624623

624+
625625
cdef parse_timezone_directive(object z):
626626
"""
627627
Parse the '%z' directive and return a pytz.FixedOffset

0 commit comments

Comments
 (0)