From 9ae8c966c27c37e4a31ad29506dbe83f0a3bc530 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Jan 2020 16:18:56 -0800 Subject: [PATCH 1/5] BUG: Period[us] start_time off by 1 nanosecond --- pandas/_libs/tslibs/period.pyx | 8 ++++++++ pandas/tests/scalar/period/test_asfreq.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3dd560ece188d..c7f6bc40f55c1 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1182,10 +1182,18 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts + int64_t value if ordinal == NPY_NAT: return NPY_NAT + if freq == 11000: + # Microsecond, avoid get_date_info to prevent floating point errors + value = ordinal * 1000 + dt64_to_dtstruct(value, &dts) + check_dts_bounds(&dts) + return value + get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) return dtstruct_to_dt64(&dts) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 357274e724c68..0606b93db7eeb 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -3,7 +3,7 @@ from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG, _period_code_map from pandas.errors import OutOfBoundsDatetime -from pandas import Period, offsets +from pandas import Period, Timestamp, offsets class TestFreqConversion: @@ -656,6 +656,17 @@ def test_conv_secondly(self): assert ival_S.asfreq("S") == ival_S + def test_conv_microsecond(self): + # Avoid floating point errors dropping the start_time to before + # the beginning of the Period + per = Period("2020-01-30 15:57:27.576166", freq="U") + assert per.ordinal == 1580399847576166 + + start = per.start_time + expected = Timestamp("2020-01-30 15:57:27.576166") + assert start == expected + assert start.value == per.ordinal * 1000 + def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq="A", year=2007) From 430fdcf8c81478093611ce8f71b88d864789109a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Jan 2020 17:01:06 -0800 Subject: [PATCH 2/5] catch overflows --- pandas/_libs/tslibs/period.pyx | 9 ++++----- pandas/tests/scalar/period/test_asfreq.py | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c7f6bc40f55c1..7e2391a0a9e8c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -22,7 +22,7 @@ PyDateTime_IMPORT from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, pandas_datetime_to_datetimestruct, check_dts_bounds, - NPY_DATETIMEUNIT, NPY_FR_D) + NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us) cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, @@ -1182,17 +1182,16 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts - int64_t value if ordinal == NPY_NAT: return NPY_NAT if freq == 11000: # Microsecond, avoid get_date_info to prevent floating point errors - value = ordinal * 1000 - dt64_to_dtstruct(value, &dts) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts) check_dts_bounds(&dts) - return value + # Equivalent: return ordinal * 1000 + return dtstruct_to_dt64(&dts) get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 0606b93db7eeb..0da2853ee7d2c 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -667,6 +667,12 @@ def test_conv_microsecond(self): assert start == expected assert start.value == per.ordinal * 1000 + per2 = Period("2300-01-01", "us") + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.start_time + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.end_time + def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq="A", year=2007) From 949054545c595b0f2ffb9782ad2fec362db8d6c3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 08:13:28 -0800 Subject: [PATCH 3/5] GH ref, whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/scalar/period/test_asfreq.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e07a8fa0469f4..7db306d7ff82b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -105,6 +105,7 @@ Datetimelike - Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) +- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 0da2853ee7d2c..436810042186a 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -657,8 +657,8 @@ def test_conv_secondly(self): assert ival_S.asfreq("S") == ival_S def test_conv_microsecond(self): - # Avoid floating point errors dropping the start_time to before - # the beginning of the Period + # GH#31475 Avoid floating point errors dropping the start_time to + # before the beginning of the Period per = Period("2020-01-30 15:57:27.576166", freq="U") assert per.ordinal == 1580399847576166 From 444b432a5cd31cfc08e48b565fe2fd8fd087c0e7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 15:05:07 -0800 Subject: [PATCH 4/5] dedup --- pandas/_libs/tslibs/period.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 85854b863c43a..9419f0eba39aa 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1172,11 +1172,9 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: if freq == 11000: # Microsecond, avoid get_date_info to prevent floating point errors pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts) - check_dts_bounds(&dts) - # Equivalent: return ordinal * 1000 - return dtstruct_to_dt64(&dts) + else: + get_date_info(ordinal, freq, &dts) - get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) return dtstruct_to_dt64(&dts) From c25617711eb7e00338b5741bcd9df509d4513aea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 15:22:40 -0800 Subject: [PATCH 5/5] fix missing import --- pandas/core/series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bfe9969daaa8e..040fcf392733b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,6 +23,7 @@ from pandas._config import get_option from pandas._libs import lib, properties, reshape, tslibs +from pandas._libs.index import validate_numeric_casting from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution @@ -1022,7 +1023,7 @@ def __setitem__(self, key, value): def _set_with_engine(self, key, value): # fails with AttributeError for IntervalIndex loc = self.index._engine.get_loc(key) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value def _set_with(self, key, value): @@ -1105,7 +1106,7 @@ def _set_value(self, label, value, takeable: bool = False): self._values[label] = value else: loc = self.index.get_loc(label) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value except KeyError: