From 903237af19c746bbe134f9d161d62182d4a8cc4d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Jun 2022 11:37:29 -0700 Subject: [PATCH 1/7] ENH: Timestamp +- timedeltalike scalar support non-nano --- pandas/_libs/tslibs/timestamps.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 1d21f602fac05..d6c6644f7630d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -400,10 +400,14 @@ cdef class _Timestamp(ABCTimestamp): new_value = int(self.value) + int(nanos) try: - result = type(self)._from_value_and_reso(new_value, reso=self._reso, tz=self.tzinfo) + result = type(self)._from_value_and_reso( + new_value, reso=self._reso, tz=self.tzinfo + ) except OverflowError as err: # TODO: don't hard-code nanosecond here - raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {new_value}") from err + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {new_value}" + ) from err if result is not NaT: result._set_freq(self._freq) # avoid warning in constructor From b0316a6fd042023830cd4dcb149e02192883f91f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Jun 2022 20:23:26 -0700 Subject: [PATCH 2/7] ENH: Timestamp.__sub__(datetime) with non-nano --- pandas/_libs/tslibs/timestamps.pyx | 19 +++--- .../tests/scalar/timestamp/test_timestamp.py | 60 +++++++++++++++++++ 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d6c6644f7630d..8edfa789ec807 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -413,9 +413,6 @@ cdef class _Timestamp(ABCTimestamp): result._set_freq(self._freq) # avoid warning in constructor return result - elif isinstance(self, _Timestamp) and self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) - elif is_integer_object(other): raise integer_op_not_supported(self) @@ -450,9 +447,6 @@ cdef class _Timestamp(ABCTimestamp): neg_other = -other return self + neg_other - elif isinstance(self, _Timestamp) and self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) - elif is_array(other): if other.dtype.kind in ['i', 'u']: raise integer_op_not_supported(self) @@ -483,10 +477,18 @@ cdef class _Timestamp(ABCTimestamp): "Cannot subtract tz-naive and tz-aware datetime-like objects." ) + # We allow silent casting to the lower resolution if and only + # if it is lossless. + if self._reso < other._reso: + other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) + elif self._reso > other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta try: - return Timedelta(self.value - other.value) + res_value = self.value - other.value + return Timedelta._from_value_and_reso(res_value, self._reso) except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: if isinstance(other, _Timestamp): if both_timestamps: @@ -507,9 +509,6 @@ cdef class _Timestamp(ABCTimestamp): return NotImplemented def __rsub__(self, other): - if self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) - if PyDateTime_Check(other): try: return type(self)(other) - self diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 79c8a300b34e3..db9c75eeb7485 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -22,6 +22,7 @@ from pandas._libs.tslibs.timezones import ( dateutil_gettz as gettz, get_timezone, + maybe_get_tz, tz_compare, ) from pandas.errors import OutOfBoundsDatetime @@ -712,6 +713,11 @@ def dt64(self, reso): def ts(self, dt64): return Timestamp._from_dt64(dt64) + @pytest.fixture + def ts_tz(self, ts, tz_aware_fixture): + tz = maybe_get_tz(tz_aware_fixture) + return Timestamp._from_value_and_reso(ts.value, ts._reso, tz) + def test_non_nano_construction(self, dt64, ts, reso): assert ts.value == dt64.view("i8") @@ -893,6 +899,60 @@ def test_addsub_timedeltalike_non_nano(self, dt64, ts, td): assert result._reso == ts._reso assert result == expected + @pytest.mark.xfail(reason="tz_localize not yet implemented for non-nano") + def test_addsub_offset(self, ts_tz): + # specifically non-Tick offset + off = offsets.YearBegin(1) + result = ts_tz + off + + assert isinstance(result, Timestamp) + assert result._reso == ts_tz._reso + # If ts_tz is ever on the last day of the year, the year would be + # incremented by one + assert result.year == ts_tz.year + assert result.day == 31 + assert result.month == 12 + assert tz_compare(result.tz, ts_tz.tz) + + def test_sub_datetimelike_mismatched_reso(self, ts_tz): + # case with non-lossy rounding + ts = ts_tz + + # choose a unit for `other` that doesn't match ts_tz's + unit = { + NpyDatetimeUnit.NPY_FR_us.value: "ms", + NpyDatetimeUnit.NPY_FR_ms.value: "s", + NpyDatetimeUnit.NPY_FR_s.value: "us", + }[ts._reso] + other = ts._as_unit(unit) + assert other._reso != ts._reso + + result = ts - other + assert isinstance(result, Timedelta) + assert result.value == 0 + assert result._reso == min(ts._reso, other._reso) + + result = other - ts + assert isinstance(result, Timedelta) + assert result.value == 0 + assert result._reso == min(ts._reso, other._reso) + + # TODO: clarify in message that add/sub is allowed only when lossless? + msg = "Cannot losslessly convert units" + if ts._reso < other._reso: + # Case where rounding is lossy + other2 = other + Timedelta._from_value_and_reso(1, other._reso) + with pytest.raises(ValueError, match=msg): + ts - other2 + with pytest.raises(ValueError, match=msg): + other2 - ts + else: + ts2 = ts + Timedelta._from_value_and_reso(1, ts._reso) + with pytest.raises(ValueError, match=msg): + ts2 - other + with pytest.raises(ValueError, match=msg): + other - ts2 + class TestAsUnit: def test_as_unit(self): From ecc7d80826dcfbec112c3594d65156f01092b47e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Jun 2022 13:09:15 -0700 Subject: [PATCH 3/7] better exception message --- pandas/_libs/tslibs/timestamps.pyx | 15 +++++++++++---- pandas/tests/scalar/timestamp/test_timestamp.py | 16 +++++++++++++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8edfa789ec807..da2377a9b085c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -479,10 +479,17 @@ cdef class _Timestamp(ABCTimestamp): # We allow silent casting to the lower resolution if and only # if it is lossless. - if self._reso < other._reso: - other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) - elif self._reso > other._reso: - self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + try: + if self._reso < other._reso: + other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) + elif self._reso > other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + except ValueError as err: + raise ValueError( + "Timestamp subtraction with mismatched resolutions is not " + "allowed when casting to the lower resolution would require " + "lossy rounding." + ) from err # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index db9c75eeb7485..f7f19e49d0bac 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -914,11 +914,22 @@ def test_addsub_offset(self, ts_tz): assert result.month == 12 assert tz_compare(result.tz, ts_tz.tz) + result = ts_tz - off + + assert isinstance(result, Timestamp) + assert result._reso == ts_tz._reso + assert result.year == ts_tz.year - 1 + assert result.day == 31 + assert result.month == 12 + assert tz_compare(result.tz, ts_tz.tz) + def test_sub_datetimelike_mismatched_reso(self, ts_tz): # case with non-lossy rounding ts = ts_tz - # choose a unit for `other` that doesn't match ts_tz's + # choose a unit for `other` that doesn't match ts_tz's; + # this construction ensures we get cases with other._reso < ts._reso + # and cases with other._reso > ts._reso unit = { NpyDatetimeUnit.NPY_FR_us.value: "ms", NpyDatetimeUnit.NPY_FR_ms.value: "s", @@ -937,8 +948,7 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): assert result.value == 0 assert result._reso == min(ts._reso, other._reso) - # TODO: clarify in message that add/sub is allowed only when lossless? - msg = "Cannot losslessly convert units" + msg = "Timestamp subtraction with mismatched resolutions" if ts._reso < other._reso: # Case where rounding is lossy other2 = other + Timedelta._from_value_and_reso(1, other._reso) From d6df87c0f87b30a406d1583940efd87390134bbe Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 14 Jun 2022 18:32:19 +0200 Subject: [PATCH 4/7] BUG: concat not sorting mixed column names when None is included (#47331) * REGR: concat not sorting columns for mixed column names * Fix none in columns * BUG: concat not sorting column names when None is included * Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Roeschke * Add gh reference Co-authored-by: Matthew Roeschke --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/algorithms.py | 7 +++++-- pandas/tests/reshape/concat/test_concat.py | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5891eeea98cbb..681139fb51272 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -924,6 +924,7 @@ Reshaping - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) +- Bug in :func:`concat` not sorting the column names when ``None`` is included (:issue:`47331`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) - Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 888e943488953..cf73fd7c8929e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1771,9 +1771,12 @@ def safe_sort( def _sort_mixed(values) -> np.ndarray: """order ints before strings in 1d arrays, safe in py3""" str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) - nums = np.sort(values[~str_pos]) + none_pos = np.array([x is None for x in values], dtype=bool) + nums = np.sort(values[~str_pos & ~none_pos]) strs = np.sort(values[str_pos]) - return np.concatenate([nums, np.asarray(strs, dtype=object)]) + return np.concatenate( + [nums, np.asarray(strs, dtype=object), np.array(values[none_pos])] + ) def _sort_tuples(values: np.ndarray) -> np.ndarray: diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index eb44b4889afb8..17c797fc36159 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -469,12 +469,12 @@ def __iter__(self): tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected) def test_concat_order(self): - # GH 17344 + # GH 17344, GH#47331 dfs = [DataFrame(index=range(3), columns=["a", 1, None])] - dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)] + dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for _ in range(100)] result = concat(dfs, sort=True).columns - expected = dfs[0].columns + expected = Index([1, "a", None]) tm.assert_index_equal(result, expected) def test_concat_different_extension_dtypes_upcasts(self): From 16b39ae56719f74a08a782ecc5deb8ba784db59a Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Tue, 14 Jun 2022 18:49:16 +0200 Subject: [PATCH 5/7] Add run-tests action (#47292) * Add run-tests action * Fix * Fix * Fix * Update macos-windows.yml * Update posix.yml * Update python-dev.yml * Update action.yml * Update macos-windows.yml * Update posix.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml --- .github/actions/run-tests/action.yml | 27 +++++++++++++++++++ .github/workflows/macos-windows.yml | 16 +---------- .github/workflows/posix.yml | 19 +------------ .github/workflows/python-dev.yml | 40 +++++++--------------------- 4 files changed, 39 insertions(+), 63 deletions(-) create mode 100644 .github/actions/run-tests/action.yml diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml new file mode 100644 index 0000000000000..2a7601f196ec4 --- /dev/null +++ b/.github/actions/run-tests/action.yml @@ -0,0 +1,27 @@ +name: Run tests and report results +runs: + using: composite + steps: + - name: Test + run: ci/run_tests.sh + shell: bash -el {0} + + - name: Publish test results + uses: actions/upload-artifact@v2 + with: + name: Test results + path: test-data.xml + if: failure() + + - name: Report Coverage + run: coverage report -m + shell: bash -el {0} + if: failure() + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v2 + with: + flags: unittests + name: codecov-pandas + fail_ci_if_error: false + if: failure() diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml index 26e6c8699ca64..4c48d83b68947 100644 --- a/.github/workflows/macos-windows.yml +++ b/.github/workflows/macos-windows.yml @@ -53,18 +53,4 @@ jobs: uses: ./.github/actions/build_pandas - name: Test - run: ci/run_tests.sh - - - name: Publish test results - uses: actions/upload-artifact@v3 - with: - name: Test results - path: test-data.xml - if: failure() - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 - with: - flags: unittests - name: codecov-pandas - fail_ci_if_error: false + uses: ./.github/actions/run-tests diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 061b2b361ca62..831bbd8bb3233 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -157,23 +157,6 @@ jobs: uses: ./.github/actions/build_pandas - name: Test - run: ci/run_tests.sh + uses: ./.github/actions/run-tests # TODO: Don't continue on error for PyPy continue-on-error: ${{ env.IS_PYPY == 'true' }} - - - name: Build Version - run: conda list - - - name: Publish test results - uses: actions/upload-artifact@v3 - with: - name: Test results - path: test-data.xml - if: failure() - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 - with: - flags: unittests - name: codecov-pandas - fail_ci_if_error: false diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index 753e288f5e391..09639acafbba1 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -57,40 +57,20 @@ jobs: - name: Install dependencies shell: bash -el {0} run: | - python -m pip install --upgrade pip setuptools wheel - pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy - pip install git+https://github.com/nedbat/coveragepy.git - pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov - pip list + python3 -m pip install --upgrade pip setuptools wheel + python3 -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy + python3 -m pip install git+https://github.com/nedbat/coveragepy.git + python3 -m pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17 + python3 -m pip list - name: Build Pandas run: | - python setup.py build_ext -q -j2 - python -m pip install -e . --no-build-isolation --no-use-pep517 + python3 setup.py build_ext -q -j2 + python3 -m pip install -e . --no-build-isolation --no-use-pep517 - name: Build Version run: | - python -c "import pandas; pandas.show_versions();" + python3 -c "import pandas; pandas.show_versions();" - - name: Test with pytest - shell: bash -el {0} - run: | - ci/run_tests.sh - - - name: Publish test results - uses: actions/upload-artifact@v3 - with: - name: Test results - path: test-data.xml - if: failure() - - - name: Report Coverage - run: | - coverage report -m - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 - with: - flags: unittests - name: codecov-pandas - fail_ci_if_error: true + - name: Test + uses: ./.github/actions/run-tests From 50fa2f65346a6180a3fd7dc4f6def13244f5429c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Jun 2022 09:54:15 -0700 Subject: [PATCH 6/7] ENH: Timestamp pickle support non-nano tzaware (#47340) --- pandas/_libs/tslibs/timestamps.pyx | 9 +-------- pandas/tests/scalar/timestamp/test_timestamp.py | 5 ++++- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index da2377a9b085c..2694991b54d4a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -157,14 +157,7 @@ cdef inline _Timestamp create_timestamp_from_ts( def _unpickle_timestamp(value, freq, tz, reso=NPY_FR_ns): # GH#41949 dont warn on unpickle if we have a freq - if reso == NPY_FR_ns: - ts = Timestamp(value, tz=tz) - else: - if tz is not None: - raise NotImplementedError - abbrev = npy_unit_to_abbrev(reso) - dt64 = np.datetime64(value, abbrev) - ts = Timestamp._from_dt64(dt64) + ts = Timestamp._from_value_and_reso(value, reso, tz) ts._set_freq(freq) return ts diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index f7f19e49d0bac..a02268956651c 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -842,7 +842,10 @@ def test_cmp_cross_reso_reversed_dt64(self): assert other.asm8 < ts - def test_pickle(self, ts): + def test_pickle(self, ts, tz_aware_fixture): + tz = tz_aware_fixture + tz = maybe_get_tz(tz) + ts = Timestamp._from_value_and_reso(ts.value, ts._reso, tz) rt = tm.round_trip_pickle(ts) assert rt._reso == ts._reso assert rt == ts From 55c216b0b3c385308b32fa8c63895080e71153b5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Jun 2022 16:23:23 -0700 Subject: [PATCH 7/7] ENH: Timestamp.tz_localize support non-nano --- pandas/_libs/tslibs/ccalendar.pxd | 2 - pandas/_libs/tslibs/ccalendar.pyx | 5 -- pandas/_libs/tslibs/period.pyx | 1 - pandas/_libs/tslibs/timestamps.pyx | 15 ++--- pandas/_libs/tslibs/tzconversion.pyx | 67 +++++++++++++------ pandas/_libs/tslibs/vectorized.pyx | 1 - .../tests/scalar/timestamp/test_timezones.py | 35 +++++++--- .../tests/scalar/timestamp/test_unary_ops.py | 46 ++++++++++--- 8 files changed, 113 insertions(+), 59 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 511c9f94a47d8..341f2176f5eb4 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -15,8 +15,6 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil cpdef int get_lastbday(int year, int month) nogil cpdef int get_firstbday(int year, int month) nogil -cdef int64_t DAY_NANOS -cdef int64_t HOUR_NANOS cdef dict c_MONTH_NUMBERS cdef int32_t* month_offset diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ff6f1721ca6c9..00ee15b73f551 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -47,11 +47,6 @@ DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', int_to_weekday = {num: name for num, name in enumerate(DAYS)} weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} -DAY_SECONDS = 86400 -HOUR_SECONDS = 3600 - -cdef const int64_t DAY_NANOS = DAY_SECONDS * 1_000_000_000 -cdef const int64_t HOUR_NANOS = HOUR_SECONDS * 1_000_000_000 # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 0c05037097839..e32b0fd2bba3f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -60,7 +60,6 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( - c_MONTH_NUMBERS, dayofweek, get_day_of_year, get_days_in_month, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2694991b54d4a..711d10222c133 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -84,13 +84,13 @@ from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, cmp_dtstructs, cmp_scalar, - dt64_to_dtstruct, get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, npy_datetimestruct, + npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, - pydatetime_to_dt64, + pydatetime_to_dtstruct, ) from pandas._libs.tslibs.np_datetime import ( @@ -530,7 +530,8 @@ cdef class _Timestamp(ABCTimestamp): npy_datetimestruct dts if own_tz is not None and not is_utc(own_tz): - val = pydatetime_to_dt64(self, &dts) + self.nanosecond + pydatetime_to_dtstruct(self, &dts) + val = npy_datetimestruct_to_datetime(self._reso, &dts) + self.nanosecond else: val = self.value return val @@ -2044,11 +2045,6 @@ default 'raise' >>> pd.NaT.tz_localize() NaT """ - if self._reso != NPY_FR_ns: - if tz is None and self.tz is None: - return self - raise NotImplementedError(self._reso) - if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') @@ -2077,7 +2073,7 @@ default 'raise' "Cannot localize tz-aware Timestamp, use tz_convert for conversions" ) - out = Timestamp(value, tz=tz) + out = type(self)._from_value_and_reso(value, self._reso, tz=tz) if out is not NaT: out._set_freq(self._freq) # avoid warning in constructor return out @@ -2124,7 +2120,6 @@ default 'raise' >>> pd.NaT.tz_convert(tz='Asia/Tokyo') NaT """ - if self.tzinfo is None: # tz naive, use tz_localize raise TypeError( diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 7657633c7215a..dffe02ef15148 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -27,11 +27,10 @@ from numpy cimport ( cnp.import_array() -from pandas._libs.tslibs.ccalendar cimport ( - DAY_NANOS, - HOUR_NANOS, +from pandas._libs.tslibs.dtypes cimport ( + periods_per_day, + periods_per_second, ) -from pandas._libs.tslibs.dtypes cimport periods_per_second from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, @@ -153,6 +152,7 @@ cdef int64_t tz_localize_to_utc_single( return val elif is_utc(tz) or tz is None: + # TODO: test with non-nano return val elif is_tzlocal(tz) or is_zoneinfo(tz): @@ -161,6 +161,15 @@ cdef int64_t tz_localize_to_utc_single( elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] + # TODO: de-duplicate with Localizer.__init__ + if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + if reso == NPY_DATETIMEUNIT.NPY_FR_us: + delta = delta // 1000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + delta = delta // 1_000_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + delta = delta // 1_000_000_000 + return val - delta else: @@ -229,6 +238,7 @@ timedelta-like} bint fill_nonexist = False str stamp Localizer info = Localizer(tz, reso=reso) + int64_t pph = periods_per_day(reso) // 24 # Vectorized version of DstTzInfo.localize if info.use_utc: @@ -242,7 +252,9 @@ timedelta-like} if v == NPY_NAT: result[i] = NPY_NAT else: - result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True, reso=reso) + result[i] = v - _tz_localize_using_tzinfo_api( + v, tz, to_utc=True, reso=reso + ) return result.base # to return underlying ndarray elif info.use_fixed: @@ -283,7 +295,7 @@ timedelta-like} shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds - shift_delta = delta_to_nanoseconds(nonexistent) + shift_delta = delta_to_nanoseconds(nonexistent, reso=reso) elif nonexistent not in ('raise', None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") @@ -291,12 +303,14 @@ timedelta-like} # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) - result_a, result_b =_get_utc_bounds(vals, info.tdata, info.ntrans, info.deltas) + result_a, result_b =_get_utc_bounds( + vals, info.tdata, info.ntrans, info.deltas, reso=reso + ) # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) if infer_dst: - dst_hours = _get_dst_hours(vals, result_a, result_b) + dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso) # Pre-compute delta_idx_offset that will be used if we go down non-existent # paths. @@ -316,12 +330,15 @@ timedelta-like} left = result_a[i] right = result_b[i] if val == NPY_NAT: + # TODO: test with non-nano result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: + # TODO: test with non-nano result[i] = left else: if infer_dst and dst_hours[i] != NPY_NAT: + # TODO: test with non-nano result[i] = dst_hours[i] elif is_dst: if ambiguous_array[i]: @@ -329,9 +346,10 @@ timedelta-like} else: result[i] = right elif fill: + # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous result[i] = NPY_NAT else: - stamp = _render_tstamp(val) + stamp = _render_tstamp(val, reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp}, try using the " "'ambiguous' argument" @@ -339,23 +357,24 @@ timedelta-like} elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: + # TODO: test with non-nano result[i] = right else: # Handle nonexistent times if shift_forward or shift_backward or shift_delta != 0: # Shift the nonexistent time to the closest existing time - remaining_mins = val % HOUR_NANOS + remaining_mins = val % pph if shift_delta != 0: # Validate that we don't relocalize on another nonexistent # time - if -1 < shift_delta + remaining_mins < HOUR_NANOS: + if -1 < shift_delta + remaining_mins < pph: raise ValueError( "The provided timedelta will relocalize on a " f"nonexistent time: {nonexistent}" ) new_local = val + shift_delta elif shift_forward: - new_local = val + (HOUR_NANOS - remaining_mins) + new_local = val + (pph - remaining_mins) else: # Subtract 1 since the beginning hour is _inclusive_ of # nonexistent times @@ -368,7 +387,7 @@ timedelta-like} elif fill_nonexist: result[i] = NPY_NAT else: - stamp = _render_tstamp(val) + stamp = _render_tstamp(val, reso=reso) raise pytz.NonExistentTimeError(stamp) return result.base # .base to get underlying ndarray @@ -404,10 +423,11 @@ cdef inline Py_ssize_t bisect_right_i8(int64_t *data, return left -cdef inline str _render_tstamp(int64_t val): +cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp - return str(Timestamp(val)) + ts = Timestamp._from_value_and_reso(val, reso, None) + return str(ts) cdef _get_utc_bounds( @@ -415,6 +435,7 @@ cdef _get_utc_bounds( int64_t* tdata, Py_ssize_t ntrans, const int64_t[::1] deltas, + NPY_DATETIMEUNIT reso, ): # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) @@ -424,6 +445,7 @@ cdef _get_utc_bounds( Py_ssize_t i, n = vals.size int64_t val, v_left, v_right Py_ssize_t isl, isr, pos_left, pos_right + int64_t ppd = periods_per_day(reso) result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) @@ -438,8 +460,8 @@ cdef _get_utc_bounds( if val == NPY_NAT: continue - # TODO: be careful of overflow in val-DAY_NANOS - isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1 + # TODO: be careful of overflow in val-ppd + isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1 if isl < 0: isl = 0 @@ -449,8 +471,8 @@ cdef _get_utc_bounds( if v_left + deltas[pos_left] == val: result_a[i] = v_left - # TODO: be careful of overflow in val+DAY_NANOS - isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1 + # TODO: be careful of overflow in val+ppd + isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1 if isr < 0: isr = 0 @@ -465,10 +487,11 @@ cdef _get_utc_bounds( @cython.boundscheck(False) cdef ndarray[int64_t] _get_dst_hours( - # vals only needed here to potential render an exception message + # vals, reso only needed here to potential render an exception message const int64_t[:] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b, + NPY_DATETIMEUNIT reso, ): cdef: Py_ssize_t i, n = vals.shape[0] @@ -497,7 +520,7 @@ cdef ndarray[int64_t] _get_dst_hours( if trans_idx.size == 1: # TODO: not reached in tests 2022-05-02; possible? - stamp = _render_tstamp(vals[trans_idx[0]]) + stamp = _render_tstamp(vals[trans_idx[0]], reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" @@ -519,7 +542,7 @@ cdef ndarray[int64_t] _get_dst_hours( delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): # TODO: not reached in tests 2022-05-02; possible? - stamp = _render_tstamp(vals[grp[0]]) + stamp = _render_tstamp(vals[grp[0]], reso=reso) raise pytz.AmbiguousTimeError(stamp) # Find the index for the switch and pull from a for dst and b diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 75efe6d4113cf..6201c94ecc155 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -19,7 +19,6 @@ cnp.import_array() from .dtypes import Resolution -from .ccalendar cimport DAY_NANOS from .dtypes cimport ( c_Resolution, periods_per_day, diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index a7f7393fb3263..874575fa9ad4c 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -20,6 +20,7 @@ ) from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -57,10 +58,11 @@ def test_tz_localize_pushes_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.max.tz_localize("US/Pacific") - def test_tz_localize_ambiguous_bool(self): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_tz_localize_ambiguous_bool(self, unit): # make sure that we are correctly accepting bool values as ambiguous # GH#14402 - ts = Timestamp("2015-11-01 01:00:03") + ts = Timestamp("2015-11-01 01:00:03")._as_unit(unit) expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") @@ -70,9 +72,11 @@ def test_tz_localize_ambiguous_bool(self): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -245,17 +249,28 @@ def test_timestamp_tz_localize(self, tz): ], ) @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) def test_timestamp_tz_localize_nonexistent_shift( - self, start_ts, tz, end_ts, shift, tz_type + self, start_ts, tz, end_ts, shift, tz_type, unit ): # GH 8917, 24466 tz = tz_type + tz if isinstance(shift, str): shift = "shift_" + shift - ts = Timestamp(start_ts) + ts = Timestamp(start_ts)._as_unit(unit) result = ts.tz_localize(tz, nonexistent=shift) expected = Timestamp(end_ts).tz_localize(tz) - assert result == expected + + if unit == "us": + assert result == expected.replace(nanosecond=0) + elif unit == "ms": + micros = expected.microsecond - expected.microsecond % 1000 + assert result == expected.replace(microsecond=micros, nanosecond=0) + elif unit == "s": + assert result == expected.replace(microsecond=0, nanosecond=0) + else: + assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("offset", [-1, 1]) @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) @@ -268,16 +283,18 @@ def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): ts.tz_localize(tz, nonexistent=timedelta(seconds=offset)) @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz, unit): # GH 8917 - ts = Timestamp("2015-03-29 02:20:00") + ts = Timestamp("2015-03-29 02:20:00")._as_unit(unit) result = ts.tz_localize(tz, nonexistent="NaT") assert result is NaT @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - def test_timestamp_tz_localize_nonexistent_raise(self, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_timestamp_tz_localize_nonexistent_raise(self, tz, unit): # GH 8917 - ts = Timestamp("2015-03-29 02:20:00") + ts = Timestamp("2015-03-29 02:20:00")._as_unit(unit) msg = "2015-03-29 02:20:00" with pytest.raises(pytz.NonExistentTimeError, match=msg): ts.tz_localize(tz, nonexistent="raise") diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 35065a3c9877c..4ac50e3f4e034 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -161,18 +161,30 @@ def test_floor(self): assert result == expected @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) - def test_round_dst_border_ambiguous(self, method): + @pytest.mark.parametrize( + "unit", + [ + "ns", + pytest.param("us", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("ms", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("s", marks=pytest.mark.xfail(reason="round not implemented")), + ], + ) + def test_round_dst_border_ambiguous(self, method, unit): # GH 18946 round near "fall back" DST ts = Timestamp("2017-10-29 00:00:00", tz="UTC").tz_convert("Europe/Madrid") + ts = ts._as_unit(unit) # result = getattr(ts, method)("H", ambiguous=True) assert result == ts + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -189,12 +201,22 @@ def test_round_dst_border_ambiguous(self, method): ["floor", "2018-03-11 03:01:00-0500", "2H"], ], ) - def test_round_dst_border_nonexistent(self, method, ts_str, freq): + @pytest.mark.parametrize( + "unit", + [ + "ns", + pytest.param("us", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("ms", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("s", marks=pytest.mark.xfail(reason="round not implemented")), + ], + ) + def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): # GH 23324 round near "spring forward" DST - ts = Timestamp(ts_str, tz="America/Chicago") + ts = Timestamp(ts_str, tz="America/Chicago")._as_unit(unit) result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -466,35 +488,41 @@ def test_replace_across_dst(self, tz, normalize): ts2b = normalize(ts2) assert ts2 == ts2b - def test_replace_dst_border(self): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_replace_dst_border(self, unit): # Gh 7825 - t = Timestamp("2013-11-3", tz="America/Chicago") + t = Timestamp("2013-11-3", tz="America/Chicago")._as_unit(unit) result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) - def test_replace_dst_fold(self, fold, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_replace_dst_fold(self, fold, tz, unit): # GH 25017 d = datetime(2019, 10, 27, 2, 30) - ts = Timestamp(d, tz=tz) + ts = Timestamp(d, tz=tz)._as_unit(unit) result = ts.replace(hour=1, fold=fold) expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize( tz, ambiguous=not fold ) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value # -------------------------------------------------------------- # Timestamp.normalize @pytest.mark.parametrize("arg", ["2013-11-30", "2013-11-30 12:00:00"]) - def test_normalize(self, tz_naive_fixture, arg): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_normalize(self, tz_naive_fixture, arg, unit): tz = tz_naive_fixture - ts = Timestamp(arg, tz=tz) + ts = Timestamp(arg, tz=tz)._as_unit(unit) result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_normalize_pre_epoch_dates(self): # GH: 36294