From 62827ef237e846509e299ce6b43c715bde015d54 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 16:59:36 -0800 Subject: [PATCH 01/11] PERF: Timestamp.normalize --- asv_bench/benchmarks/timestamp.py | 3 +++ pandas/_libs/tslibs/timestamps.pyx | 3 +++ pandas/tests/scalar/timestamp/test_unary_ops.py | 11 +++++++++++ 3 files changed, 17 insertions(+) diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 8eaf815eaa962..ef7d486bf689c 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -102,6 +102,9 @@ def time_replace_None(self, tz): def time_to_pydatetime(self, tz): self.ts.to_pydatetime() + def time_normalize(self, tz): + self.ts.normalize() + class TimestampAcrossDst(object): def setup(self): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 457f5003cb9a5..992340e16490c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -40,6 +40,7 @@ from timezones cimport ( # Constants _zero_time = datetime_time(0, 0) _no_input = object() +cdef int64_t DAY_NS = 86400000000000 # ---------------------------------------------------------------------- @@ -1285,6 +1286,8 @@ class Timestamp(_Timestamp): Normalize Timestamp to midnight, preserving tz information. """ + if self.tz is None: + return Timestamp(self.value - (self.value % DAY_NS)) normalized_value = normalize_i8_timestamps( np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index a9a60c4119605..21404bf7ef76f 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -328,6 +328,17 @@ def test_replace_dst_border(self): expected = Timestamp('2013-11-3 03:00:00', tz='America/Chicago') assert result == expected + # -------------------------------------------------------------- + # Timestamp.normalize + + @pytest.mark.parametrize('arg', ['2013-11-30', '2013-11-30 12:00:00']) + def test_normalize(self, tz_naive_fixture, arg): + tz = tz_naive_fixture + ts = Timestamp(arg, tz=tz) + result = ts.normalize() + expected = Timestamp('2013-11-30', tz=tz) + assert result == expected + # -------------------------------------------------------------- @td.skip_if_windows From ff171f75932861615fa7f20cdfea3ea772c43159 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 19:12:00 -0800 Subject: [PATCH 02/11] PERF DatetimeIndex.normalize --- pandas/core/arrays/datetimes.py | 9 ++++++++- pandas/tests/indexes/datetimes/test_scalar_compat.py | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b0485cc82f07f..5ee776412b64f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -832,7 +832,14 @@ def normalize(self): '2014-08-01 00:00:00+05:30'], dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ - new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) + if self.tz is None: + not_null = self.notnull() + DAY_NS = 86400000000000 + new_values = self.asi8.copy() + adjustment = (new_values[not_null] % DAY_NS) + new_values[not_null] = new_values[not_null] - adjustment + else: + new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) return type(self)(new_values, freq='infer').tz_localize(self.tz) def to_period(self, freq=None): diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 81f4c77009ce4..eb4169639482f 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -235,6 +235,12 @@ def test_normalize(self): assert result.is_normalized assert not rng.is_normalized + def test_normalize_nat(self): + dti = DatetimeIndex([pd.NaT, Timestamp('2018-01-01 01:00:00')]) + result = dti.normalize() + expected = DatetimeIndex([pd.NaT, Timestamp('2018-01-01')]) + tm.assert_index_equal(result, expected) + class TestDateTimeIndexToJulianDate(object): From efb281faebe91d5c63585f21584c3cfcd13543ca Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 20:25:06 -0800 Subject: [PATCH 03/11] remove unused cython code --- pandas/_libs/tslibs/conversion.pyx | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f88671b41a16a..bae07f7557a65 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1116,9 +1116,9 @@ def normalize_date(dt: object) -> datetime: @cython.boundscheck(False) def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): """ - Normalize each of the (nanosecond) timestamps in the given array by - rounding down to the beginning of the day (i.e. midnight). If `tz` - is not None, then this is midnight for this timezone. + Normalize each of the (nanosecond) timezone aware timestamps in the given + array by rounding down to the beginning of the day (i.e. midnight). + This is midnight for timezone, `tz`. Parameters ---------- @@ -1134,17 +1134,8 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): npy_datetimestruct dts int64_t[:] result = np.empty(n, dtype=np.int64) - if tz is not None: - tz = maybe_get_tz(tz) - result = _normalize_local(stamps, tz) - else: - with nogil: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i], &dts) - result[i] = _normalized_stamp(&dts) + tz = maybe_get_tz(tz) + result = _normalize_local(stamps, tz) return result.base # .base to access underlying np.ndarray From cc6eee0ee9684121921bdb6516fde3daa6c6c43e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 20:26:50 -0800 Subject: [PATCH 04/11] remove more unused cython --- pandas/_libs/tslibs/conversion.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bae07f7557a65..fbc738a101d4c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1130,8 +1130,7 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): result : int64 ndarray of converted of normalized nanosecond timestamps """ cdef: - Py_ssize_t i, n = len(stamps) - npy_datetimestruct dts + Py_ssize_t n = len(stamps) int64_t[:] result = np.empty(n, dtype=np.int64) tz = maybe_get_tz(tz) From b90abd9a788b5e28230b8b39d7ffa5b2a34ed130 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 20:35:28 -0800 Subject: [PATCH 05/11] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 73fd526640212..38cbf8b52e060 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1073,6 +1073,7 @@ Performance Improvements - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`) +- Imporved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive datetimes (:issue:`23634`) .. _whatsnew_0240.docs: From 35fac22a795e209da3cbc8199d9c00b7c7b74961 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Nov 2018 22:02:47 -0800 Subject: [PATCH 06/11] typo --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 38cbf8b52e060..cb8166e1027b3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1073,7 +1073,7 @@ Performance Improvements - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`) -- Imporved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive datetimes (:issue:`23634`) +- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive datetimes (:issue:`23634`) .. _whatsnew_0240.docs: From 0b3a664659891fb7a3588304edd014657016111b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 12 Nov 2018 21:37:58 -0800 Subject: [PATCH 07/11] collect day_ns useage into day_s --- pandas/_libs/tslibs/conversion.pyx | 8 ++++---- pandas/_libs/tslibs/fields.pyx | 4 ++-- pandas/_libs/tslibs/np_datetime.pxd | 1 + pandas/_libs/tslibs/np_datetime.pyx | 6 ++++++ pandas/_libs/tslibs/timedeltas.pyx | 8 +++----- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fbc738a101d4c..622f9fbeae518 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -22,7 +22,8 @@ from np_datetime cimport (check_dts_bounds, npy_datetime, dt64_to_dtstruct, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, - pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns) + pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns, + DAY_S) from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, @@ -41,7 +42,6 @@ from nattype cimport NPY_NAT, checknull_with_nat # ---------------------------------------------------------------------- # Constants -cdef int64_t DAY_NS = 86400000000000LL cdef int64_t HOURS_NS = 3600000000000 NS_DTYPE = np.dtype('M8[ns]') TD_DTYPE = np.dtype('m8[ns]') @@ -931,10 +931,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_b[:] = NPY_NAT idx_shifted_left = (np.maximum(0, trans.searchsorted( - vals - DAY_NS, side='right') - 1)).astype(np.int64) + vals - DAY_S * 1000000000, side='right') - 1)).astype(np.int64) idx_shifted_right = (np.maximum(0, trans.searchsorted( - vals + DAY_NS, side='right') - 1)).astype(np.int64) + vals + DAY_S * 1000000000, side='right') - 1)).astype(np.int64) for i in range(n): val = vals[i] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 4c0af69d72517..0502bcb4bb7e8 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -16,7 +16,7 @@ from ccalendar import get_locale_names, MONTHS_FULL, DAYS_FULL from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, get_week_of_year, get_day_of_year) from np_datetime cimport (npy_datetimestruct, pandas_timedeltastruct, - dt64_to_dtstruct, td64_to_tdstruct) + dt64_to_dtstruct, td64_to_tdstruct, DAY_S) from nattype cimport NPY_NAT @@ -36,7 +36,7 @@ def get_time_micros(ndarray[int64_t] dtindex): cdef: ndarray[int64_t] micros - micros = np.mod(dtindex, 86400000000000, dtype=np.int64) // 1000LL + micros = np.mod(dtindex, DAY_S * 1000000000, dtype=np.int64) // 1000LL return micros diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 803c8cb18e3d5..4bd057303c6fa 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,6 +54,7 @@ cdef extern from "src/datetime/np_datetime.h": NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil +cdef int64_t DAY_S cdef int reverse_ops[6] diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 61d0f697e7fe4..04db9e032c48e 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -37,6 +37,12 @@ cdef extern from "src/datetime/np_datetime_strings.h": npy_datetimestruct *out, int *out_local, int *out_tzoffset) +# ---------------------------------------------------------------------- +# time constants + +cdef int64_t DAY_S = 86400 + + # ---------------------------------------------------------------------- # numpy object inspection diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ca8491726a5f7..54823dd491221 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -29,7 +29,7 @@ from util cimport (is_timedelta64_object, is_datetime64_object, is_string_object) from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, - pandas_timedeltastruct) + pandas_timedeltastruct, DAY_S) from nattype import nat_strings, NaT from nattype cimport checknull_with_nat, NPY_NAT @@ -38,8 +38,6 @@ from offsets cimport to_offset # ---------------------------------------------------------------------- # Constants -cdef int64_t DAY_NS = 86400000000000LL - # components named tuple Components = collections.namedtuple('Components', [ 'days', 'hours', 'minutes', 'seconds', @@ -266,10 +264,10 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: m = 1000000000L * 2629746 p = 9 elif unit == 'W': - m = 1000000000L * 86400 * 7 + m = 1000000000L * DAY_S * 7 p = 9 elif unit == 'D' or unit == 'd': - m = 1000000000L * 86400 + m = 1000000000L * DAY_S p = 9 elif unit == 'h': m = 1000000000L * 3600 From 52a7eb2126ee8ea05efef0816af27785dfdebea9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 14 Nov 2018 12:42:09 -0800 Subject: [PATCH 08/11] Move constant to ccalendar --- pandas/_libs/tslibs/ccalendar.pyx | 2 ++ pandas/_libs/tslibs/conversion.pyx | 9 +++++---- pandas/_libs/tslibs/fields.pyx | 7 ++++--- pandas/_libs/tslibs/np_datetime.pxd | 1 - pandas/_libs/tslibs/np_datetime.pyx | 5 ----- pandas/_libs/tslibs/timedeltas.pyx | 8 +++++--- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/core/arrays/datetimes.py | 4 ++-- 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 07c146c06b510..35588d34d5143 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -49,6 +49,8 @@ DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', int_to_weekday = {num: name for num, name in enumerate(DAYS)} weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} +DAY_SECONDS = 86400 + # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 622f9fbeae518..ea9a48c7c06d5 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -16,14 +16,15 @@ from cpython.datetime cimport (datetime, tzinfo, PyDateTime_CheckExact, PyDateTime_IMPORT) PyDateTime_IMPORT +from ccalendar import DAY_SECONDS + from np_datetime cimport (check_dts_bounds, npy_datetimestruct, pandas_datetime_to_datetimestruct, _string_to_dts, npy_datetime, dt64_to_dtstruct, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, - pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns, - DAY_S) + pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns) from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, @@ -931,10 +932,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_b[:] = NPY_NAT idx_shifted_left = (np.maximum(0, trans.searchsorted( - vals - DAY_S * 1000000000, side='right') - 1)).astype(np.int64) + vals - DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) idx_shifted_right = (np.maximum(0, trans.searchsorted( - vals + DAY_S * 1000000000, side='right') - 1)).astype(np.int64) + vals + DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) for i in range(n): val = vals[i] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 0502bcb4bb7e8..72157c2fcb2f3 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -12,11 +12,11 @@ cimport numpy as cnp from numpy cimport ndarray, int64_t, int32_t, int8_t cnp.import_array() -from ccalendar import get_locale_names, MONTHS_FULL, DAYS_FULL +from ccalendar import get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, get_week_of_year, get_day_of_year) from np_datetime cimport (npy_datetimestruct, pandas_timedeltastruct, - dt64_to_dtstruct, td64_to_tdstruct, DAY_S) + dt64_to_dtstruct, td64_to_tdstruct) from nattype cimport NPY_NAT @@ -36,7 +36,8 @@ def get_time_micros(ndarray[int64_t] dtindex): cdef: ndarray[int64_t] micros - micros = np.mod(dtindex, DAY_S * 1000000000, dtype=np.int64) // 1000LL + micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64) + micros //= 1000LL return micros diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 4bd057303c6fa..803c8cb18e3d5 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,7 +54,6 @@ cdef extern from "src/datetime/np_datetime.h": NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil -cdef int64_t DAY_S cdef int reverse_ops[6] diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 04db9e032c48e..dbbe9da381f0a 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -37,11 +37,6 @@ cdef extern from "src/datetime/np_datetime_strings.h": npy_datetimestruct *out, int *out_local, int *out_tzoffset) -# ---------------------------------------------------------------------- -# time constants - -cdef int64_t DAY_S = 86400 - # ---------------------------------------------------------------------- # numpy object inspection diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 54823dd491221..4d612a6f43107 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -28,8 +28,10 @@ from util cimport (is_timedelta64_object, is_datetime64_object, is_integer_object, is_float_object, is_string_object) +from ccalendar import DAY_SECONDS + from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, - pandas_timedeltastruct, DAY_S) + pandas_timedeltastruct) from nattype import nat_strings, NaT from nattype cimport checknull_with_nat, NPY_NAT @@ -264,10 +266,10 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: m = 1000000000L * 2629746 p = 9 elif unit == 'W': - m = 1000000000L * DAY_S * 7 + m = 1000000000L * DAY_SECONDS * 7 p = 9 elif unit == 'D' or unit == 'd': - m = 1000000000L * DAY_S + m = 1000000000L * DAY_SECONDS p = 9 elif unit == 'h': m = 1000000000L * 3600 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 992340e16490c..3a51db185af09 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -21,6 +21,7 @@ from util cimport (is_datetime64_object, is_timedelta64_object, is_offset_object) cimport ccalendar +from ccalendar import DAY_SECONDS from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) @@ -40,7 +41,6 @@ from timezones cimport ( # Constants _zero_time = datetime_time(0, 0) _no_input = object() -cdef int64_t DAY_NS = 86400000000000 # ---------------------------------------------------------------------- @@ -1287,6 +1287,7 @@ class Timestamp(_Timestamp): tz information. """ if self.tz is None: + DAY_NS = DAY_SECONDS * 1000000000 return Timestamp(self.value - (self.value % DAY_NS)) normalized_value = normalize_i8_timestamps( np.array([self.value], dtype='i8'), tz=self.tz)[0] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4c874fd1f4a1d..af6a7fc2e2dd8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -8,7 +8,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslib import Timestamp, NaT, iNaT from pandas._libs.tslibs import ( - normalize_date, + ccalendar, normalize_date, conversion, fields, timezones, resolution as libresolution) @@ -852,7 +852,7 @@ def normalize(self): """ if self.tz is None: not_null = self.notnull() - DAY_NS = 86400000000000 + DAY_NS = ccalendar.DAY_SECONDS * 1000000000 new_values = self.asi8.copy() adjustment = (new_values[not_null] % DAY_NS) new_values[not_null] = new_values[not_null] - adjustment From 0ece20817f7e4817899e09c7e7c8f41dfc9e2f65 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 16 Nov 2018 11:37:15 -0800 Subject: [PATCH 09/11] move HOUR_NS --- pandas/_libs/tslibs/ccalendar.pyx | 1 + pandas/_libs/tslibs/conversion.pyx | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 35588d34d5143..587213049af85 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -50,6 +50,7 @@ int_to_weekday = {num: name for num, name in enumerate(DAYS)} weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} DAY_SECONDS = 86400 +HOUR_SECONDS = 3600 # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ea9a48c7c06d5..c2897e1d0e8c8 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -16,7 +16,7 @@ from cpython.datetime cimport (datetime, tzinfo, PyDateTime_CheckExact, PyDateTime_IMPORT) PyDateTime_IMPORT -from ccalendar import DAY_SECONDS +from ccalendar import DAY_SECONDS, HOUR_SECONDS from np_datetime cimport (check_dts_bounds, npy_datetimestruct, @@ -43,7 +43,6 @@ from nattype cimport NPY_NAT, checknull_with_nat # ---------------------------------------------------------------------- # Constants -cdef int64_t HOURS_NS = 3600000000000 NS_DTYPE = np.dtype('M8[ns]') TD_DTYPE = np.dtype('m8[ns]') @@ -876,6 +875,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins + int64_t HOURS_NS = HOUR_SECONDS * 1000000000 ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False From bc5571acf61c2e1a8806f44438514f1ceb884b17 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 16 Nov 2018 15:03:21 -0800 Subject: [PATCH 10/11] Add performance for UTC timestamps --- asv_bench/benchmarks/timestamp.py | 2 +- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/_libs/tslibs/timestamps.pyx | 5 +++-- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/core/arrays/datetimes.py | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index ef7d486bf689c..a0f3bfa6298f3 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -87,7 +87,7 @@ def time_microsecond(self, tz, freq): class TimestampOps(object): - params = [None, 'US/Eastern'] + params = [None, 'US/Eastern', 'UTC'] param_names = ['tz'] def setup(self, tz): diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 45852813a5f20..2a3a5f744943b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1134,7 +1134,7 @@ Performance Improvements - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`) -- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive datetimes (:issue:`23634`) +- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`) .. _whatsnew_0240.docs: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3a51db185af09..b25947dca30c4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1286,9 +1286,10 @@ class Timestamp(_Timestamp): Normalize Timestamp to midnight, preserving tz information. """ - if self.tz is None: + if self.tz is None or is_utc(self.tz): DAY_NS = DAY_SECONDS * 1000000000 - return Timestamp(self.value - (self.value % DAY_NS)) + normalized_value = self.value - (self.value % DAY_NS) + return Timestamp(normalized_value).tz_localize(self.tz) normalized_value = normalize_i8_timestamps( np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 8965b46f747c4..50c4a41f97a82 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -cdef bint is_utc(object tz) +cpdef bint is_utc(object tz) cdef bint is_tzlocal(object tz) cdef bint treat_tz_as_pytz(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 19a00fbd37dd5..a2a40a8aa1ca4 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -27,7 +27,7 @@ cdef int64_t NPY_NAT = get_nat() # ---------------------------------------------------------------------- -cdef inline bint is_utc(object tz): +cpdef inline bint is_utc(object tz): return tz is UTC or isinstance(tz, _dateutil_tzutc) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 31c1231498aaa..a226ac0f21d2c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -853,7 +853,7 @@ def normalize(self): '2014-08-01 00:00:00+05:30'], dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ - if self.tz is None: + if self.tz is None or timezones.is_utc(self.tz): not_null = self.notnull() DAY_NS = ccalendar.DAY_SECONDS * 1000000000 new_values = self.asi8.copy() From fb11dcfb3af96b2c47df5680bd5e9d6af33d60b4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Nov 2018 16:58:24 -0800 Subject: [PATCH 11/11] use notna instead --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c25eb2a23862f..b3a9973e50ff0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -854,7 +854,7 @@ def normalize(self): dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ if self.tz is None or timezones.is_utc(self.tz): - not_null = self.notnull() + not_null = self.notna() DAY_NS = ccalendar.DAY_SECONDS * 1000000000 new_values = self.asi8.copy() adjustment = (new_values[not_null] % DAY_NS)