From 5ca4980b494f03906fae8e2ee642a4175e0a4b63 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 19 Apr 2022 09:32:27 -0700 Subject: [PATCH 1/2] Make tdata a Localizer attribute --- pandas/_libs/tslibs/vectorized.pyx | 34 ++++++++---------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 5829bf5492ef8..a03a15bc1a7a6 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -59,6 +59,7 @@ cdef class Localizer: Py_ssize_t ntrans const int64_t[::1] deltas int64_t delta + int64_t* tdata @cython.initializedcheck(False) @cython.boundscheck(False) @@ -69,6 +70,7 @@ cdef class Localizer: self.ntrans = -1 # placeholder self.delta = -1 # placeholder self.deltas = _deltas_placeholder + self.tdata = NULL if is_utc(tz) or tz is None: self.use_utc = True @@ -91,6 +93,8 @@ cdef class Localizer: if typ == "pytz": self.use_pytz = True + self.tdata = cnp.PyArray_DATA(self.trans) + # ------------------------------------------------------------------------- @@ -135,7 +139,6 @@ def ints_to_pydatetime( Localizer info = Localizer(tz) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.shape[0] - int64_t* tdata = NULL npy_datetimestruct dts tzinfo new_tz @@ -156,9 +159,6 @@ def ints_to_pydatetime( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) - if info.use_dst: - tdata = cnp.PyArray_DATA(info.trans) - for i in range(n): utc_val = stamps[i] new_tz = tz @@ -174,7 +174,7 @@ def ints_to_pydatetime( elif info.use_fixed: local_val = utc_val + info.delta else: - pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1 + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 local_val = utc_val + info.deltas[pos] if info.use_pytz: @@ -222,14 +222,10 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: Localizer info = Localizer(tz) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.shape[0] - int64_t* tdata = NULL npy_datetimestruct dts c_Resolution reso = c_Resolution.RESO_DAY, curr_reso - if info.use_dst: - tdata = cnp.PyArray_DATA(info.trans) - for i in range(n): utc_val = stamps[i] if utc_val == NPY_NAT: @@ -242,7 +238,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: elif info.use_fixed: local_val = utc_val + info.delta else: - pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1 + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 local_val = utc_val + info.deltas[pos] dt64_to_dtstruct(local_val, &dts) @@ -278,13 +274,9 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t Localizer info = Localizer(tz) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.shape[0] - int64_t* tdata = NULL int64_t[::1] result = np.empty(n, dtype=np.int64) - if info.use_dst: - tdata = cnp.PyArray_DATA(info.trans) - for i in range(n): utc_val = stamps[i] if utc_val == NPY_NAT: @@ -298,7 +290,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t elif info.use_fixed: local_val = utc_val + info.delta else: - pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1 + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 local_val = utc_val + info.deltas[pos] result[i] = local_val - (local_val % DAY_NANOS) @@ -327,10 +319,6 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: Localizer info = Localizer(tz) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.shape[0] - int64_t* tdata = NULL - - if info.use_dst: - tdata = cnp.PyArray_DATA(info.trans) for i in range(n): utc_val = stamps[i] @@ -341,7 +329,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: elif info.use_fixed: local_val = utc_val + info.delta else: - pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1 + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 local_val = utc_val + info.deltas[pos] if local_val % DAY_NANOS != 0: @@ -360,14 +348,10 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): Localizer info = Localizer(tz) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.shape[0] - int64_t* tdata = NULL npy_datetimestruct dts int64_t[::1] result = np.empty(n, dtype=np.int64) - if info.use_dst: - tdata = cnp.PyArray_DATA(info.trans) - for i in range(n): utc_val = stamps[i] if utc_val == NPY_NAT: @@ -381,7 +365,7 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): elif info.use_fixed: local_val = utc_val + info.delta else: - pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1 + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 local_val = utc_val + info.deltas[pos] dt64_to_dtstruct(local_val, &dts) From a1015ec3b60f684940193736332327e8c37cc721 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 19 Apr 2022 12:24:28 -0700 Subject: [PATCH 2/2] REF: re-use Localizer for tz_convert_from_utc --- asv_bench/benchmarks/tslibs/tz_convert.py | 10 ++- pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/tzconversion.pyi | 5 -- pandas/_libs/tslibs/tzconversion.pyx | 104 +--------------------- pandas/_libs/tslibs/vectorized.pyi | 3 + pandas/_libs/tslibs/vectorized.pyx | 49 ++++++++++ pandas/core/arrays/datetimes.py | 11 ++- pandas/tests/tslibs/test_api.py | 1 + pandas/tests/tslibs/test_conversion.py | 11 +-- pandas/tseries/frequencies.py | 6 +- 10 files changed, 79 insertions(+), 123 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index 803c2aaa635b0..c6b510efdca69 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -11,10 +11,14 @@ try: old_sig = False - from pandas._libs.tslibs.tzconversion import tz_convert_from_utc + from pandas._libs.tslibs import tz_convert_from_utc except ImportError: - old_sig = True - from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc + try: + old_sig = False + from pandas._libs.tslibs.tzconversion import tz_convert_from_utc + except ImportError: + old_sig = True + from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc class TimeTZConvert: diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 15836854bad4d..7cbc1833093ba 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -20,6 +20,7 @@ "get_resolution", "Timestamp", "tz_convert_from_utc_single", + "tz_convert_from_utc", "to_offset", "Tick", "BaseOffset", @@ -64,4 +65,5 @@ ints_to_pydatetime, is_date_array_normalized, normalize_i8_timestamps, + tz_convert_from_utc, ) diff --git a/pandas/_libs/tslibs/tzconversion.pyi b/pandas/_libs/tslibs/tzconversion.pyi index 5e513eefdca15..8647dee712294 100644 --- a/pandas/_libs/tslibs/tzconversion.pyi +++ b/pandas/_libs/tslibs/tzconversion.pyi @@ -8,11 +8,6 @@ import numpy as np from pandas._typing import npt -def tz_convert_from_utc( - vals: npt.NDArray[np.int64], # const int64_t[:] - tz: tzinfo, -) -> npt.NDArray[np.int64]: ... - # py_tz_convert_from_utc_single exposed for testing def py_tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ... def tz_localize_to_utc( diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 806df7928a5a1..8cb57b0b28f3a 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -429,18 +429,7 @@ cdef int64_t localize_tzinfo_api( int64_t utc_val, tzinfo tz, bint* fold=NULL ) except? -1: """ - Parameters - ---------- - utc_val : int64_t - tz : tzinfo - fold : bint* - pointer to fold: whether datetime ends up in a fold or not - after adjustment - - Returns - ------- - delta : int64_t - Value to add when converting from utc. + See _tz_localize_using_tzinfo_api.__doc__ """ return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold) @@ -514,97 +503,6 @@ cdef int64_t tz_convert_from_utc_single( return utc_val + deltas[0] -def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): - """ - Convert the values (in i8) from UTC to tz - - Parameters - ---------- - vals : int64 ndarray - tz : tzinfo - - Returns - ------- - int64 ndarray of converted - """ - cdef: - const int64_t[:] converted - - if vals.shape[0] == 0: - return np.array([], dtype=np.int64) - - converted = _tz_convert_from_utc(vals, tz) - return np.asarray(converted, dtype=np.int64) - - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): - """ - Convert the given values (in i8) either to UTC or from UTC. - - Parameters - ---------- - stamps : int64 ndarray - tz : tzinfo - - Returns - ------- - converted : ndarray[int64_t] - """ - cdef: - Py_ssize_t i, ntrans = -1, n = stamps.shape[0] - ndarray[int64_t] trans - int64_t[::1] deltas - int64_t* tdata = NULL - intp_t pos - int64_t utc_val, local_val, delta = NPY_NAT - bint use_utc = False, use_tzlocal = False, use_fixed = False - str typ - - int64_t[::1] result - - if is_utc(tz) or tz is None: - # Much faster than going through the "standard" pattern below - return stamps.copy() - - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz) or is_zoneinfo(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - ntrans = trans.shape[0] - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - tdata = cnp.PyArray_DATA(trans) - - result = np.empty(n, dtype=np.int64) - - for i in range(n): - utc_val = stamps[i] - if utc_val == NPY_NAT: - result[i] = NPY_NAT - continue - - # The pattern used in vectorized.pyx checks for use_utc here, - # but we handle that case above. - if use_tzlocal: - local_val = utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False) - elif use_fixed: - local_val = utc_val + delta - else: - pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 - local_val = utc_val + deltas[pos] - - result[i] = local_val - - return result - - # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef int64_t _tz_localize_using_tzinfo_api( diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index c138050c9c17f..a8f81514c5645 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -34,3 +34,6 @@ def ints_to_pydatetime( fold: bool = ..., box: str = ..., ) -> npt.NDArray[np.object_]: ... +def tz_convert_from_utc( + stamps: npt.NDArray[np.int64], tz: tzinfo | None +) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index a03a15bc1a7a6..f98f7eab0c8cc 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -96,6 +96,55 @@ cdef class Localizer: self.tdata = cnp.PyArray_DATA(self.trans) +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): + """ + Convert the values (in i8) from UTC to tz + + Parameters + ---------- + stamps : ndarray[int64] + tz : tzinfo + + Returns + ------- + ndarray[int64] + """ + cdef: + Localizer info = Localizer(tz) + int64_t utc_val, local_val + Py_ssize_t pos, i, n = stamps.shape[0] + + int64_t[::1] result + + if tz is None or is_utc(tz) or stamps.size == 0: + # Much faster than going through the "standard" pattern below + return stamps.base.copy() + + result = np.empty(n, dtype=np.int64) + + for i in range(n): + utc_val = stamps[i] + if utc_val == NPY_NAT: + result[i] = NPY_NAT + continue + + if info.use_utc: + local_val = utc_val + elif info.use_tzlocal: + local_val = utc_val + localize_tzinfo_api(utc_val, tz) + elif info.use_fixed: + local_val = utc_val + info.delta + else: + pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1 + local_val = utc_val + info.deltas[pos] + + result[i] = local_val + + return result.base + + # ------------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 667a7b6ed4ae1..0e51c33da3a4b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -34,6 +34,7 @@ normalize_i8_timestamps, timezones, to_offset, + tz_convert_from_utc, tzconversion, ) from pandas._typing import npt @@ -814,7 +815,7 @@ def _local_timestamps(self) -> np.ndarray: if self.tz is None or timezones.is_utc(self.tz): # Avoid the copy that would be made in tzconversion return self.asi8 - return tzconversion.tz_convert_from_utc(self.asi8, self.tz) + return tz_convert_from_utc(self.asi8, self.tz) def tz_convert(self, tz) -> DatetimeArray: """ @@ -1046,7 +1047,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArr if self.tz is not None: if tz is None: - new_dates = tzconversion.tz_convert_from_utc(self.asi8, self.tz) + new_dates = tz_convert_from_utc(self.asi8, self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: @@ -2132,7 +2133,11 @@ def _sequence_to_dt64ns( # by convention, these are _already_ UTC, e.g return data.view(DT64NS_DTYPE), tz, None - utc_vals = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + if timezones.is_utc(tz): + # Fastpath, avoid copy made in tzconversion + utc_vals = data.view("i8") + else: + utc_vals = tz_convert_from_utc(data.view("i8"), tz) data = utc_vals.view(DT64NS_DTYPE) elif inferred_tz: tz = inferred_tz diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 755ac3d144246..9655bb88c2fcf 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -46,6 +46,7 @@ def test_namespace(): "delta_to_nanoseconds", "ints_to_pytimedelta", "localize_pydatetime", + "tz_convert_from_utc", "tz_convert_from_utc_single", "to_offset", "tz_compare", diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index a790b2617783f..99be0e63d58e2 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -9,6 +9,7 @@ conversion, iNaT, timezones, + tz_convert_from_utc, tzconversion, ) @@ -23,7 +24,7 @@ def _compare_utc_to_local(tz_didx): def f(x): return tzconversion.py_tz_convert_from_utc_single(x, tz_didx.tz) - result = tzconversion.tz_convert_from_utc(tz_didx.asi8, tz_didx.tz) + result = tz_convert_from_utc(tz_didx.asi8, tz_didx.tz) expected = np.vectorize(f)(tz_didx.asi8) tm.assert_numpy_array_equal(result, expected) @@ -53,11 +54,11 @@ def _compare_local_to_utc(tz_didx, naive_didx): def test_tz_localize_to_utc_copies(): # GH#46460 arr = np.arange(5, dtype="i8") - result = tzconversion.tz_convert_from_utc(arr, tz=UTC) + result = tz_convert_from_utc(arr, tz=UTC) tm.assert_numpy_array_equal(result, arr) assert not np.shares_memory(arr, result) - result = tzconversion.tz_convert_from_utc(arr, tz=None) + result = tz_convert_from_utc(arr, tz=None) tm.assert_numpy_array_equal(result, arr) assert not np.shares_memory(arr, result) @@ -89,7 +90,7 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): ], ) def test_tz_convert_corner(arr): - result = tzconversion.tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo")) + result = tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo")) tm.assert_numpy_array_equal(result, arr) @@ -97,7 +98,7 @@ def test_tz_convert_readonly(): # GH#35530 arr = np.array([0], dtype=np.int64) arr.setflags(write=False) - result = tzconversion.tz_convert_from_utc(arr, UTC) + result = tz_convert_from_utc(arr, UTC) tm.assert_numpy_array_equal(result, arr) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 9ce70ec38870c..bc851447b59e1 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -7,7 +7,7 @@ from pandas._libs.algos import unique_deltas from pandas._libs.tslibs import ( Timestamp, - tzconversion, + tz_convert_from_utc, ) from pandas._libs.tslibs.ccalendar import ( DAYS, @@ -217,9 +217,7 @@ def __init__(self, index, warn: bool = True) -> None: # the timezone so they are in local time if hasattr(index, "tz"): if index.tz is not None: - self.i8values = tzconversion.tz_convert_from_utc( - self.i8values, index.tz - ) + self.i8values = tz_convert_from_utc(self.i8values, index.tz) if warn is not True: warnings.warn(