From 261719c47312b77ad6e0e0acb1afe15276beda7b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 1 Oct 2022 16:37:25 -0700 Subject: [PATCH 01/22] API: Timedelta(td64_obj) retain resolution --- pandas/_libs/tslibs/timedeltas.pyx | 1 + pandas/core/construction.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index dd19306bd49c6..27b4724281bd4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1926,6 +1926,7 @@ class Timedelta(_Timedelta): def __floordiv__(self, other): # numpy does not implement floordiv for timedelta64 dtype, so we cannot # just defer + orig = other if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 989ce0335a476..f030100928afb 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -324,6 +324,13 @@ def array( data = extract_array(data, extract_numpy=True) + if isinstance(data, ExtensionArray) and ( + dtype is None or is_dtype_equal(dtype, data.dtype) + ): + if copy: + return data.copy() + return data + # this returns None for not-found dtypes. if isinstance(dtype, str): dtype = registry.find(dtype) or dtype From a8c69064bc26f44c010ee2f33ded76356c1e9573 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 3 Oct 2022 09:11:42 -0700 Subject: [PATCH 02/22] BUG: preserve DTA/TDA+timedeltalike scalar with mismatched resos --- pandas/tests/arrays/test_datetimes.py | 35 ++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8c2a8df7c2fbd..fdb8278f718d0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -1,12 +1,16 @@ """ Tests for DatetimeArray """ +from datetime import timedelta import operator import numpy as np import pytest -from pandas._libs.tslibs import tz_compare +from pandas._libs.tslibs import ( + npy_unit_to_abbrev, + tz_compare, +) from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -247,6 +251,35 @@ def test_sub_datetime64_reso_mismatch(self): result2 = right - left tm.assert_extension_array_equal(result2, expected) + @pytest.mark.parametrize( + "scalar", + [ + timedelta(hours=2), + pd.Timedelta(hours=2), + np.timedelta64(2, "h"), + np.timedelta64(2 * 3600 * 1000, "ms"), + pd.offsets.Minute(120), + pd.offsets.Hour(2), + ], + ) + def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar): + dta, dti = dta_dti + + td = pd.Timedelta(scalar) + exp_reso = max(dta._reso, td._reso) + exp_unit = npy_unit_to_abbrev(exp_reso) + + expected = (dti + td)._data._as_unit(exp_unit) + result = dta + scalar + tm.assert_extension_array_equal(result, expected) + + result = scalar + dta + tm.assert_extension_array_equal(result, expected) + + expected = (dti - td)._data._as_unit(exp_unit) + result = dta - scalar + tm.assert_extension_array_equal(result, expected) + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is From 6f5d4b5c09fa6f46dc3c325b059fe2583ebc48d6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Sep 2022 11:20:20 -0700 Subject: [PATCH 03/22] BUG: DatetimeArray-datetimelike mixed resos --- pandas/tests/arrays/test_datetimes.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index fdb8278f718d0..05f674e43be76 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -280,6 +280,32 @@ def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar): result = dta - scalar tm.assert_extension_array_equal(result, expected) + def test_sub_datetimelike_scalar_mismatch(self): + dti = pd.date_range("2016-01-01", periods=3) + dta = dti._data._as_unit("us") + + ts = dta[0]._as_unit("s") + + result = dta - ts + expected = (dti - dti[0])._data._as_unit("us") + assert result.dtype == "m8[us]" + tm.assert_extension_array_equal(result, expected) + + def test_sub_datetime64_reso_mismatch(self): + dti = pd.date_range("2016-01-01", periods=3) + left = dti._data._as_unit("s") + right = left._as_unit("ms") + + result = left - right + exp_values = np.array([0, 0, 0], dtype="m8[ms]") + expected = TimedeltaArray._simple_new( + exp_values, + dtype=exp_values.dtype, + ) + tm.assert_extension_array_equal(result, expected) + result2 = right - left + tm.assert_extension_array_equal(result2, expected) + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is From ad51d10ff845b3ace0b99f05c495ce29d7955d55 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Oct 2022 13:12:28 -0700 Subject: [PATCH 04/22] API: Timestamp(pydatetime) microsecond reso --- pandas/_libs/lib.pyx | 1 + pandas/_libs/src/ujson/python/objToJSON.c | 2 + pandas/_libs/tslib.pyx | 6 +-- pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/conversion.pxd | 1 + pandas/_libs/tslibs/conversion.pyx | 17 ++++++- pandas/_libs/tslibs/offsets.pyx | 8 ++-- pandas/_libs/tslibs/timedeltas.pyx | 1 - pandas/_libs/tslibs/timestamps.pyx | 45 ++++++------------- pandas/core/array_algos/take.py | 6 +++ pandas/core/arrays/datetimes.py | 19 +++++--- pandas/core/computation/pytables.py | 2 +- pandas/core/dtypes/cast.py | 5 ++- pandas/io/stata.py | 3 +- pandas/tests/frame/methods/test_replace.py | 2 +- pandas/tests/frame/test_constructors.py | 24 +++++++--- .../indexes/datetimes/test_constructors.py | 4 +- .../tests/indexes/datetimes/test_timezones.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/scalar/period/test_period.py | 4 +- .../tests/scalar/timedelta/test_arithmetic.py | 2 +- .../tests/scalar/timestamp/test_arithmetic.py | 12 +++-- .../scalar/timestamp/test_constructors.py | 6 +++ .../tests/scalar/timestamp/test_timestamp.py | 17 +++---- pandas/tests/tools/test_to_datetime.py | 12 +++-- pandas/tests/tslibs/test_api.py | 1 + 26 files changed, 129 insertions(+), 77 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index cc891bcb8a572..80d88ab85401d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2585,6 +2585,7 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.datetime_ = True try: + # FIXME: will have different reso depending on type idatetimes[i] = convert_to_tsobject( val, None, None, 0, 0).value except OutOfBoundsDatetime: diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 260f1ffb6165f..d6ba75a0bc749 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1305,6 +1305,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, } else if (PyDate_Check(item) || PyDelta_Check(item)) { is_datetimelike = 1; if (PyObject_HasAttrString(item, "value")) { + // FIXME: need to ensure nanos with ._as_unit("ns") or ._as_reso(NPY_FR_ns) + // see test_date_index_and_values nanosecVal = get_long_attr(item, "value"); } else { if (PyDelta_Check(item)) { diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3526ea3438aff..f208848a9b77d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -535,7 +535,7 @@ cpdef array_to_datetime( raise ValueError('Cannot mix tz-aware with ' 'tz-naive values') if isinstance(val, _Timestamp): - iresult[i] = val.value + iresult[i] = (<_Timestamp>val)._as_reso(NPY_FR_ns).value else: iresult[i] = pydatetime_to_dt64(val, &dts) check_dts_bounds(&dts) @@ -837,7 +837,7 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): # We delay this check for as long as possible # because it catches relatively rare cases if val == "now": - iresult[0] = Timestamp.utcnow().value + iresult[0] = Timestamp.utcnow().value * 1000 # *1000 to convert to nanos if not utc: # GH#18705 make sure to_datetime("now") matches Timestamp("now") warnings.warn( @@ -850,6 +850,6 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): return True elif val == "today": - iresult[0] = Timestamp.today().value + iresult[0] = Timestamp.today().value * 1000 # *1000 to convert to nanos return True return False diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 47143b32d6dbe..d472600c87c01 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -31,6 +31,7 @@ "periods_per_day", "periods_per_second", "is_supported_unit", + "npy_unit_to_abbrev", ] from pandas._libs.tslibs import dtypes @@ -38,6 +39,7 @@ from pandas._libs.tslibs.dtypes import ( Resolution, is_supported_unit, + npy_unit_to_abbrev, periods_per_day, periods_per_second, ) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index a90347415ec76..66b5c82a8b531 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -20,6 +20,7 @@ cdef class _TSObject: int64_t value # numpy dt64 tzinfo tzinfo bint fold + NPY_DATETIMEUNIT reso cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 026bf44300407..8da0b059d7f3f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -36,6 +36,7 @@ from pandas._libs.tslibs.dtypes cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + NPY_FR_us, check_dts_bounds, get_datetime64_unit, get_datetime64_value, @@ -204,10 +205,12 @@ cdef class _TSObject: # int64_t value # numpy dt64 # tzinfo tzinfo # bint fold + # NPY_DATETIMEUNIT reso def __cinit__(self): # GH 25057. As per PEP 495, set fold to 0 by default self.fold = 0 + self.reso = NPY_FR_ns # default cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, @@ -228,6 +231,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, """ cdef: _TSObject obj + NPY_DATETIMEUNIT reso obj = _TSObject() @@ -282,11 +286,19 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, obj.value = ts pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) elif PyDateTime_Check(ts): - return convert_datetime_to_tsobject(ts, tz, nanos) + if nanos == 0: + if isinstance(ts, ABCTimestamp): + reso = abbrev_to_npy_unit(ts._unit) # TODO: faster way to do this? + else: + # TODO: what if user explicitly passes nanos=0? + reso = NPY_FR_us + else: + reso = NPY_FR_ns + return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, time()) - return convert_datetime_to_tsobject(ts, tz) + return convert_datetime_to_tsobject(ts, tz, nanos=0, reso=NPY_FR_us) # TODO: or lower? else: from .period import Period if isinstance(ts, Period): @@ -340,6 +352,7 @@ cdef _TSObject convert_datetime_to_tsobject( _TSObject obj = _TSObject() int64_t pps + obj.reso = reso obj.fold = ts.fold if tz is not None: tz = maybe_get_tz(tz) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7be7381bcb4d1..d9bb7f913dc6a 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -166,7 +166,8 @@ def apply_wraps(func): result = func(self, other) - result = Timestamp(result) + result = Timestamp(result)._as_unit(other._unit) # TODO: _as_reso? + if self._adjust_dst: result = result.tz_localize(tz) @@ -179,9 +180,10 @@ def apply_wraps(func): if result.nanosecond != nano: if result.tz is not None: # convert to UTC - value = result.tz_localize(None).value + res = result.tz_localize(None) else: - value = result.value + res = result + value = res._as_unit("ns").value result = Timestamp(value + nano) if tz is not None and result.tzinfo is None: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 27b4724281bd4..dd19306bd49c6 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1926,7 +1926,6 @@ class Timedelta(_Timedelta): def __floordiv__(self, other): # numpy does not implement floordiv for timedelta64 dtype, so we cannot # just defer - orig = other if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ec7379e080d9..ad32729493912 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport ( is_array, is_datetime64_object, is_integer_object, - is_timedelta64_object, ) from pandas._libs.tslibs.fields import ( @@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport ( from pandas._libs.tslibs.timedeltas cimport ( _Timedelta, delta_to_nanoseconds, - ensure_td64ns, is_any_td_scalar, ) @@ -432,32 +430,7 @@ cdef class _Timestamp(ABCTimestamp): int64_t nanos = 0 if is_any_td_scalar(other): - if is_timedelta64_object(other): - other_reso = get_datetime64_unit(other) - if ( - other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC - ): - # TODO: deprecate allowing this? We only get here - # with test_timedelta_add_timestamp_interval - other = np.timedelta64(other.view("i8"), "ns") - other_reso = NPY_DATETIMEUNIT.NPY_FR_ns - elif ( - other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M - ): - # TODO: deprecate allowing these? or handle more like the - # corresponding DateOffsets? - # TODO: no tests get here - other = ensure_td64ns(other) - other_reso = NPY_DATETIMEUNIT.NPY_FR_ns - - if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns: - # TODO: no tests - other = ensure_td64ns(other) - if other_reso > self._reso: - # Following numpy, we cast to the higher resolution - # test_sub_timedelta64_mismatched_reso - self = (<_Timestamp>self)._as_reso(other_reso) - + other = Timedelta(other) if isinstance(other, _Timedelta): # TODO: share this with __sub__, Timedelta.__add__ @@ -563,9 +536,9 @@ cdef class _Timestamp(ABCTimestamp): # Matching numpy, we cast to the higher resolution. Unlike numpy, # we raise instead of silently overflowing during this casting. if self._reso < other._reso: - self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True) elif self._reso > other._reso: - other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) + other = (<_Timestamp>other)._as_reso(self._reso, round_ok=True) # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta @@ -1107,7 +1080,13 @@ cdef class _Timestamp(ABCTimestamp): if reso == self._reso: return self - value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + try: + value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + except OverflowError as err: + unit = npy_unit_to_abbrev(reso) + raise OutOfBoundsDatetime( + f"Cannot cast {self} to unit='{unit}' without overflow." + ) from err return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo) def _as_unit(self, str unit, bint round_ok=True): @@ -1713,7 +1692,9 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) + return create_timestamp_from_ts( + ts.value, ts.dts, ts.tzinfo, freq, ts.fold, reso=ts.reso + ) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 19c19c66a7256..ed93313711150 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -360,7 +360,13 @@ def wrapper( if out_dtype is not None: out = out.view(out_dtype) if fill_wrap is not None: + # FIXME: if we get here with dt64/td64 we need to be sure we have matching resos + if fill_value.dtype.kind == "m": + fill_value = fill_value.astype("m8[ns]") + else: + fill_value = fill_value.astype("M8[ns]") fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) return wrapper diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a9c1a7e3cdab0..de0bdd5293739 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -375,6 +375,11 @@ def _generate_range( # type: ignore[override] if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") + if start is not None: + start = start._as_unit("ns") + if end is not None: + end = end._as_unit("ns") + left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) @@ -461,7 +466,11 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") self._check_compatible_with(value, setitem=setitem) - return value.asm8 + + if value is NaT: + return np.datetime64(value.value, self._unit) + else: + return value._as_unit(self._unit).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) @@ -2574,13 +2583,13 @@ def _generate_range( start = Timestamp(start) # type: ignore[arg-type] # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") - start = start if start is not NaT else None # type: ignore[comparison-overlap] + start = start._as_unit("ns") if start is not NaT else None # type: ignore[comparison-overlap] # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]"; # expected "Union[integer[Any], float, str, date, datetime64]" end = Timestamp(end) # type: ignore[arg-type] # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") - end = end if end is not NaT else None # type: ignore[comparison-overlap] + end = end._as_unit("ns") if end is not NaT else None # type: ignore[comparison-overlap] if start and not offset.is_on_offset(start): # Incompatible types in assignment (expression has type "datetime", @@ -2621,7 +2630,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur) + next_date = offset._apply(cur)._as_unit("ns") if next_date <= cur: raise ValueError(f"Offset {offset} did not increment date") cur = next_date @@ -2635,7 +2644,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur) + next_date = offset._apply(cur)._as_unit("ns") if next_date >= cur: raise ValueError(f"Offset {offset} did not decrement date") cur = next_date diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index f0f3e7f19db50..ec769e88339f7 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -215,7 +215,7 @@ def stringify(value): if isinstance(v, (int, float)): v = stringify(v) v = ensure_decoded(v) - v = Timestamp(v) + v = Timestamp(v)._as_unit("ns") if v.tz is not None: v = v.tz_convert("UTC") return TermValue(v, v.value, kind) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6b890f98e8cac..cee4139ddfb62 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -792,16 +792,19 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, elif isinstance(val, (np.datetime64, datetime)): try: val = Timestamp(val) + if val is not NaT: + val = val._as_unit("ns") except OutOfBoundsDatetime: return _dtype_obj, val # error: Non-overlapping identity check (left operand type: "Timestamp", # right operand type: "NaTType") if val is NaT or val.tz is None: # type: ignore[comparison-overlap] - dtype = np.dtype("M8[ns]") val = val.to_datetime64() + dtype = val.dtype # TODO(2.0): this should be dtype = val.dtype # to get the correct M8 resolution + # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes else: if pandas_dtype: dtype = DatetimeTZDtype(unit="ns", tz=val.tz) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 6baf5f0da8612..4380b54b5936d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -420,7 +420,8 @@ def parse_dates_safe( d = {} if is_datetime64_dtype(dates.dtype): if delta: - time_delta = dates - stata_epoch + # TODO(2.0): once arithmetic is fixed the as_unit should be unnecessary + time_delta = dates - Timestamp(stata_epoch)._as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 177f3ec1b4504..0bff67d34999b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -839,7 +839,7 @@ def test_replace_for_new_dtypes(self, datetime_frame): ], ) def test_replace_dtypes(self, frame, to_replace, value, expected): - result = getattr(frame, "replace")(to_replace, value) + result = frame.replace(to_replace, value) tm.assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 02313e429f3b6..2eafadba7cd67 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3113,17 +3113,28 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls): with pytest.raises(TypeError, match=msg): constructor(scalar, dtype=dtype) + @pytest.mark.xfail( + reason="Timestamp constructor has been updated to cast dt64 to non-nano, " + "but DatetimeArray._from_sequence has not" + ) @pytest.mark.parametrize("cls", [datetime, np.datetime64]) - def test_from_out_of_bounds_datetime(self, constructor, cls): + def test_from_out_of_bounds_ns_datetime(self, constructor, cls): scalar = datetime(9999, 1, 1) + exp_dtype = "M8[us]" # pydatetime objects default to this reso if cls is np.datetime64: scalar = np.datetime64(scalar, "D") + exp_dtype = "M8[s]" # closest reso to input result = constructor(scalar) - assert type(get1(result)) is cls + item = get1(result) + dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + + assert type(item) is Timestamp + assert item.asm8.dtype == exp_dtype + assert dtype == exp_dtype @pytest.mark.xfail( - reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, " + reason="Timedelta constructor has been updated to cast td64 to non-nano, " "but TimedeltaArray._from_sequence has not" ) @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) @@ -3143,11 +3154,12 @@ def test_from_out_of_bounds_ns_timedelta(self, constructor, cls): assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype - def test_out_of_s_bounds_timedelta64(self, constructor): - scalar = np.timedelta64(np.iinfo(np.int64).max, "D") + @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) + def test_out_of_s_bounds_timedelta64(self, constructor, cls): + scalar = cls(np.iinfo(np.int64).max, "D") result = constructor(scalar) item = get1(result) - assert type(item) is np.timedelta64 + assert type(item) is cls dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] assert dtype == object diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index d129f5b365ca4..cfa869cbbb8c9 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1185,8 +1185,8 @@ def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): @pytest.mark.parametrize( "ts_input,fold,value_out", [ - (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000), ], ) def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index a07f21f785828..fcd91fcc6091f 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1147,7 +1147,7 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz): assert timezones.tz_compare(result.tz, tz) converted = to_datetime(dates_aware, utc=True) - ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + ex_vals = np.array([Timestamp(x)._as_unit("ns").value for x in dates_aware]) tm.assert_numpy_array_equal(converted.asi8, ex_vals) assert converted.tz is pytz.utc diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index ae13d8d5fb180..2fffb69a1e2a3 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -420,7 +420,7 @@ def test_encode_as_null(self, decoded_input): def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) - stamp = Timestamp(val) + stamp = Timestamp(val)._as_unit("ns") roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) assert roundtrip == stamp.value // 10**9 diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index b5bd61854866d..16b4e6377188c 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -883,7 +883,7 @@ def test_end_time(self): p = Period("2012", freq="A") def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) + return Timestamp(Timestamp(datetime(*args))._as_unit("ns").value - 1) xp = _ex(2013, 1, 1) assert xp == p.end_time @@ -935,7 +935,7 @@ def test_end_time_business_friday(self): def test_anchor_week_end_time(self): def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) + return Timestamp(Timestamp(datetime(*args))._as_unit("ns").value - 1) p = Period("2013-1-1", "W-SAT") xp = _ex(2013, 1, 6) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index f3b84388b0f70..2fb69eae8dcb7 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -103,7 +103,7 @@ def test_td_add_timestamp_overflow(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow" + msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + timedelta(days=13 * 19999) diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 65610bbe14e41..8f0de91d16d86 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -45,7 +45,7 @@ def test_overflow_offset_raises(self): r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " "will overflow" ) - lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow" + lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): stamp + offset_overflow @@ -62,7 +62,9 @@ def test_overflow_offset_raises(self): stamp = Timestamp("2000/1/1") offset_overflow = to_offset("D") * 100**5 - lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow" + lmsg3 = ( + r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow" + ) with pytest.raises(OutOfBoundsTimedelta, match=lmsg3): stamp + offset_overflow @@ -227,11 +229,15 @@ def test_radd_tdscalar(self, td, fixed_now_ts): ], ) def test_timestamp_add_timedelta64_unit(self, other, expected_difference): - ts = Timestamp(datetime.utcnow()) + now = datetime.utcnow() + ts = Timestamp(now)._as_unit("ns") result = ts + other valdiff = result.value - ts.value assert valdiff == expected_difference + ts2 = Timestamp(now) + assert ts2 + other == result + @pytest.mark.parametrize( "ts", [ diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 58150fdce8503..558a436905d1c 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -105,6 +105,9 @@ def test_constructor(self): for date_str, date, expected in tests: for result in [Timestamp(date_str), Timestamp(date)]: + result = result._as_unit( + "ns" + ) # test originally written before non-nano # only with timestring assert result.value == expected @@ -115,6 +118,9 @@ def test_constructor(self): # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: + result = result._as_unit( + "ns" + ) # test originally written before non-nano expected_tz = expected - offset * 3600 * 1_000_000_000 assert result.value == expected_tz diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index b6bc3a866fc8e..c13e533555570 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -644,7 +644,10 @@ def test_to_datetime_bijective(self): with tm.assert_produces_warning(exp_warning): pydt_max = Timestamp.max.to_pydatetime() - assert Timestamp(pydt_max).value / 1000 == Timestamp.max.value / 1000 + assert ( + Timestamp(pydt_max)._as_unit("ns").value / 1000 + == Timestamp.max.value / 1000 + ) exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning with tm.assert_produces_warning(exp_warning): @@ -655,7 +658,10 @@ def test_to_datetime_bijective(self): tdus = timedelta(microseconds=1) assert pydt_min + tdus > Timestamp.min - assert Timestamp(pydt_min + tdus).value / 1000 == Timestamp.min.value / 1000 + assert ( + Timestamp(pydt_min + tdus)._as_unit("ns").value / 1000 + == Timestamp.min.value / 1000 + ) def test_to_period_tz_warning(self): # GH#21333 make sure a warning is issued when timezone @@ -884,12 +890,7 @@ def test_to_period(self, dt64, ts): ) def test_addsub_timedeltalike_non_nano(self, dt64, ts, td): - if isinstance(td, Timedelta): - # td._reso is ns - exp_reso = td._reso - else: - # effective td._reso is s - exp_reso = ts._reso + exp_reso = max(ts._reso, Timedelta(td)._reso) result = ts - td expected = Timestamp(dt64) - td diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3d59e115d4cf9..6aa02ec657108 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -658,7 +658,7 @@ def test_to_datetime_today(self, tz): pdtoday2 = to_datetime(["today"])[0] tstoday = Timestamp("today") - tstoday2 = Timestamp.today() + tstoday2 = Timestamp.today()._as_unit("ns") # These should all be equal with infinite perf; this gives # a generous margin of 10 seconds @@ -1878,7 +1878,7 @@ def test_to_datetime_list_of_integers(self): def test_to_datetime_overflow(self): # gh-17637 # we are overflowing Timedelta range here - msg = "Cannot cast 139999 days, 0:00:00 to unit=ns without overflow" + msg = "Cannot cast 139999 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) @@ -2607,7 +2607,13 @@ def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): ) def test_invalid_origins(self, origin, exc, units, units_from_epochs): - msg = f"origin {origin} (is Out of Bounds|cannot be converted to a Timestamp)" + msg = "|".join( + [ + f"origin {origin} is Out of Bounds", + f"origin {origin} cannot be converted to a Timestamp", + "Cannot cast .* to unit='ns' without overflow", + ] + ) with pytest.raises(exc, match=msg): to_datetime(units_from_epochs, unit=units, origin=origin) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 2d195fad83644..45511f4a19461 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -56,6 +56,7 @@ def test_namespace(): "periods_per_day", "periods_per_second", "is_supported_unit", + "npy_unit_to_abbrev", ] expected = set(submodules + api) From 43436cee0324921fc09131a70be299274599cadc Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 11 Oct 2022 16:40:27 -0700 Subject: [PATCH 05/22] use willayd suggestion --- pandas/_libs/src/ujson/python/objToJSON.c | 32 +++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index d6ba75a0bc749..b3143e3a24016 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -318,11 +318,40 @@ static int is_simple_frame(PyObject *obj) { } static npy_int64 get_long_attr(PyObject *o, const char *attr) { + // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT + npy_int64 long_val; PyObject *value = PyObject_GetAttrString(o, attr); long_val = (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + Py_DECREF(value); + + if (long_val == NPY_MIN_INT64) { + // i.e. o is NaT + return long_val; + } + + // ensure we are in nanoseconds, similar to Timestamp._as_reso or _as_unit + PyObject* reso = PyObject_GetAttrString(o, "_reso"); + // if (!PyLong_Check(reso)) { + // TODO(anyone): we should have error handling here, but one step at a time + // } + + long cReso = PyLong_AsLong(reso); + // if (cReso == -1 && PyErr_Occurred()) { + // TODO(anyone): we should have error handling here, but one step at a time + // } + Py_DECREF(reso); + + if (cReso == NPY_FR_us) { + long_val = long_val * 1000L; + } else if (cReso == NPY_FR_ms) { + long_val = long_val * 1000000L; + } else if (cReso == NPY_FR_s) { + long_val = long_val * 1000000000L; + } + return long_val; } @@ -1305,8 +1334,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, } else if (PyDate_Check(item) || PyDelta_Check(item)) { is_datetimelike = 1; if (PyObject_HasAttrString(item, "value")) { - // FIXME: need to ensure nanos with ._as_unit("ns") or ._as_reso(NPY_FR_ns) - // see test_date_index_and_values + // see test_date_index_and_values for case with non-nano nanosecVal = get_long_attr(item, "value"); } else { if (PyDelta_Check(item)) { From 8802add29c772fb739cbb3914b7686786805d424 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Oct 2022 14:29:33 -0700 Subject: [PATCH 06/22] ci fixup --- pandas/core/array_algos/take.py | 3 ++- pandas/core/dtypes/cast.py | 4 +++- pandas/io/stata.py | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index ed93313711150..772a7e7a20b7b 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -360,7 +360,8 @@ def wrapper( if out_dtype is not None: out = out.view(out_dtype) if fill_wrap is not None: - # FIXME: if we get here with dt64/td64 we need to be sure we have matching resos + # FIXME: if we get here with dt64/td64 we need to be sure we have + # matching resos if fill_value.dtype.kind == "m": fill_value = fill_value.astype("m8[ns]") else: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index cee4139ddfb62..ff8f3f78aed1d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -792,7 +792,9 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, elif isinstance(val, (np.datetime64, datetime)): try: val = Timestamp(val) - if val is not NaT: + # error: Non-overlapping identity check (left operand type: + # "Timestamp", right operand type: "NaTType") + if val is not NaT: # type: ignore[comparison-overlap] val = val._as_unit("ns") except OutOfBoundsDatetime: return _dtype_obj, val diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4380b54b5936d..fd95a43f5e338 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -421,7 +421,9 @@ def parse_dates_safe( if is_datetime64_dtype(dates.dtype): if delta: # TODO(2.0): once arithmetic is fixed the as_unit should be unnecessary - time_delta = dates - Timestamp(stata_epoch)._as_unit("ns") + ts = Timestamp(stata_epoch) + ts = cast(Timestamp, ts) + time_delta = dates - ts._as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) From 4c6f0f6ff7e7575fdcb36891aa559b0674f3adcc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Oct 2022 15:12:16 -0700 Subject: [PATCH 07/22] mypy fixup --- pandas/core/computation/pytables.py | 5 ++++- pandas/io/stata.py | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index ec769e88339f7..b4f097706f6e3 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -11,6 +11,7 @@ import numpy as np from pandas._libs.tslibs import ( + NaT, Timedelta, Timestamp, ) @@ -215,7 +216,9 @@ def stringify(value): if isinstance(v, (int, float)): v = stringify(v) v = ensure_decoded(v) - v = Timestamp(v)._as_unit("ns") + v = Timestamp(v) + if v is not NaT: + v = v._as_unit("ns") if v.tz is not None: v = v.tz_convert("UTC") return TermValue(v, v.value, kind) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index fd95a43f5e338..4380b54b5936d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -421,9 +421,7 @@ def parse_dates_safe( if is_datetime64_dtype(dates.dtype): if delta: # TODO(2.0): once arithmetic is fixed the as_unit should be unnecessary - ts = Timestamp(stata_epoch) - ts = cast(Timestamp, ts) - time_delta = dates - ts._as_unit("ns") + time_delta = dates - Timestamp(stata_epoch)._as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) From 5c187380c516f53f49083ee5f001c0bef7cd7a12 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Oct 2022 15:47:52 -0700 Subject: [PATCH 08/22] ignore pyright --- pandas/core/computation/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index b4f097706f6e3..68874920a02aa 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -218,7 +218,7 @@ def stringify(value): v = ensure_decoded(v) v = Timestamp(v) if v is not NaT: - v = v._as_unit("ns") + v = v._as_unit("ns") # pyright: ignore[reportGeneralTypeIssues] if v.tz is not None: v = v.tz_convert("UTC") return TermValue(v, v.value, kind) From aeadbdc3f0d8bd36683b037dd758fc801aac28c8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Oct 2022 16:45:40 -0700 Subject: [PATCH 09/22] fix doctest --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9c78ecddb5caa..a26c4e000cb3e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1098,7 +1098,7 @@ cdef class _Timestamp(ABCTimestamp): -------- >>> ts = pd.Timestamp(2020, 3, 14, 15) >>> ts.asm8 - numpy.datetime64('2020-03-14T15:00:00.000000000') + numpy.datetime64('2020-03-14T15:00:00.000000') """ return self.to_datetime64() From 382c46e757e55e624fe8360c2ab83044cc7b9afa Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Oct 2022 20:14:56 -0700 Subject: [PATCH 10/22] un-xfail --- pandas/tests/series/methods/test_isin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 0eca3e0512849..449724508fcaa 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -57,10 +57,6 @@ def test_isin_datetimelike_mismatched_reso(self): result = ser.isin(dta) tm.assert_series_equal(result, expected) - @pytest.mark.xfail( - reason="DTA._from_sequence incorrectly treats Timestamp[s].value as " - "nanoseconds." - ) def test_isin_datetimelike_mismatched_reso_list(self): expected = Series([True, True, False, False, False]) From bc6f014fd97a5024079f2d2629db405117dd0eef Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Oct 2022 11:37:20 -0700 Subject: [PATCH 11/22] Merge main follow-up --- pandas/_libs/lib.pyx | 13 +++++++++---- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 7 +++++-- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/core/construction.py | 7 ------- pandas/io/stata.py | 1 - 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f59808412266a..85ca13b949580 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -109,12 +109,16 @@ from pandas._libs.missing cimport ( is_null_datetime64, is_null_timedelta64, ) -from pandas._libs.tslibs.conversion cimport convert_to_tsobject +from pandas._libs.tslibs.conversion cimport ( + _TSObject, + convert_to_tsobject, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, checknull_with_nat, ) +from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 @@ -2452,6 +2456,7 @@ def maybe_convert_objects(ndarray[object] objects, Seen seen = Seen() object val float64_t fval, fnan = np.nan + _TSObject tsobj n = len(objects) @@ -2545,9 +2550,9 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.datetime_ = True try: - # FIXME: will have different reso depending on type - idatetimes[i] = convert_to_tsobject( - val, None, None, 0, 0).value + tsobj = convert_to_tsobject(val, None, None, 0, 0) + tsobj.ensure_reso(NPY_FR_ns) + idatetimes[i] = tsobj.value except OutOfBoundsDatetime: seen.object_ = True break diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 5c73b908d7eff..3006c5f2fd246 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -22,7 +22,7 @@ cdef class _TSObject: bint fold NPY_DATETIMEUNIT reso - cdef void ensure_reso(self, NPY_DATETIMEUNIT reso) + cdef ensure_reso(self, NPY_DATETIMEUNIT reso) cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 3c3bbdcb8aad4..1aa7ef62f45d3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -213,9 +213,12 @@ cdef class _TSObject: self.fold = 0 self.reso = NPY_FR_ns # default value - cdef void ensure_reso(self, NPY_DATETIMEUNIT reso): + cdef ensure_reso(self, NPY_DATETIMEUNIT reso): if self.reso != reso: - self.value = convert_reso(self.value, self.reso, reso, False) + try: + self.value = convert_reso(self.value, self.reso, reso, False) + except OverflowError as err: + raise OutOfBoundsDatetime from err cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c97a432209fe8..05abc5976c2f8 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -165,7 +165,7 @@ def apply_wraps(func): result = func(self, other) - result = Timestamp(result)._as_unit(other._unit) # TODO: _as_reso? + result = (<_Timestamp>Timestamp(result))._as_reso(other._reso) if self._adjust_dst: result = result.tz_localize(tz) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ecfdc85c9b19c..b7db95269439c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -323,13 +323,6 @@ def array( data = extract_array(data, extract_numpy=True) - if isinstance(data, ExtensionArray) and ( - dtype is None or is_dtype_equal(dtype, data.dtype) - ): - if copy: - return data.copy() - return data - # this returns None for not-found dtypes. if isinstance(dtype, str): dtype = registry.find(dtype) or dtype diff --git a/pandas/io/stata.py b/pandas/io/stata.py index d9ba0ee5b0a07..057697f3aca65 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -419,7 +419,6 @@ def parse_dates_safe( d = {} if is_datetime64_dtype(dates.dtype): if delta: - # TODO(2.0): once arithmetic is fixed the as_unit should be unnecessary time_delta = dates - Timestamp(stata_epoch)._as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: From fe8c444d07ef00f60e1d9c1a8b3b4ae98732646b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Nov 2022 11:44:34 -0800 Subject: [PATCH 12/22] s reso for pydate --- pandas/_libs/tslibs/conversion.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c56d1f8cc854c..90f73b06ce9b9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -302,8 +302,9 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's + # For date object we give the lowest supporte resolution, ie. "s" ts = datetime.combine(ts, time()) - return convert_datetime_to_tsobject(ts, tz, nanos=0, reso=NPY_FR_us) # TODO: or lower? + return convert_datetime_to_tsobject(ts, tz, nanos=0, reso=NPY_FR_s) else: from .period import Period if isinstance(ts, Period): From 7717c10a185903f1af2b64149e7da3169d753cfe Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Nov 2022 14:36:31 -0800 Subject: [PATCH 13/22] typo fixup --- pandas/_libs/tslibs/conversion.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 90f73b06ce9b9..e0317512ad43c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -304,7 +304,9 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, # Keep the converter same as PyDateTime's # For date object we give the lowest supporte resolution, ie. "s" ts = datetime.combine(ts, time()) - return convert_datetime_to_tsobject(ts, tz, nanos=0, reso=NPY_FR_s) + return convert_datetime_to_tsobject( + ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s + ) else: from .period import Period if isinstance(ts, Period): From 06945fcf35878e94f9cb438c53d080053b07d500 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Nov 2022 08:21:17 -0800 Subject: [PATCH 14/22] post-merge fixups --- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/core/arrays/datetimes.py | 16 ++++++++-------- pandas/core/computation/pytables.py | 2 +- pandas/core/dtypes/cast.py | 2 +- pandas/io/stata.py | 2 +- pandas/tests/indexes/datetimes/test_timezones.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/scalar/period/test_period.py | 4 ++-- pandas/tests/scalar/timestamp/test_arithmetic.py | 2 +- .../tests/scalar/timestamp/test_constructors.py | 6 ++---- pandas/tests/scalar/timestamp/test_timestamp.py | 5 ++--- pandas/tests/tools/test_to_datetime.py | 2 +- 13 files changed, 23 insertions(+), 26 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index e0317512ad43c..8c724ffb1ab69 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -293,7 +293,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, elif PyDateTime_Check(ts): if nanos == 0: if isinstance(ts, ABCTimestamp): - reso = abbrev_to_npy_unit(ts._unit) # TODO: faster way to do this? + reso = abbrev_to_npy_unit(ts.unit) # TODO: faster way to do this? else: # TODO: what if user explicitly passes nanos=0? reso = NPY_FR_us diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4d63e796966ec..25bb341d223ec 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -179,7 +179,7 @@ def apply_wraps(func): res = result.tz_localize(None) else: res = result - value = res._as_unit("ns").value + value = res.as_unit("ns").value result = Timestamp(value + nano) if tz is not None and result.tzinfo is None: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5fb95b3219721..5386e50c11c6b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -403,9 +403,9 @@ def _generate_range( # type: ignore[override] raise ValueError("Neither `start` nor `end` can be NaT") if start is not None: - start = start._as_unit("ns") + start = start.as_unit("ns") if end is not None: - end = end._as_unit("ns") + end = end.as_unit("ns") left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end = _maybe_normalize_endpoints(start, end, normalize) @@ -494,9 +494,9 @@ def _unbox_scalar(self, value) -> np.datetime64: raise ValueError("'value' should be a Timestamp.") self._check_compatible_with(value) if value is NaT: - return np.datetime64(value.value, self._unit) + return np.datetime64(value.value, self.unit) else: - return value._as_unit(self._unit).asm8 + return value.as_unit(self.unit).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) @@ -2511,7 +2511,7 @@ def _generate_range( # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") if start is not NaT: # type: ignore[comparison-overlap] - start = start._as_unit("ns") + start = start.as_unit("ns") else: start = None @@ -2521,7 +2521,7 @@ def _generate_range( # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") if end is not NaT: # type: ignore[comparison-overlap] - end = end._as_unit("ns") + end = end.as_unit("ns") else: end = None @@ -2564,7 +2564,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur)._as_unit("ns") + next_date = offset._apply(cur).as_unit("ns") if next_date <= cur: raise ValueError(f"Offset {offset} did not increment date") cur = next_date @@ -2578,7 +2578,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur)._as_unit("ns") + next_date = offset._apply(cur).as_unit("ns") if next_date >= cur: raise ValueError(f"Offset {offset} did not decrement date") cur = next_date diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 202c3a35aae64..4055be3f943fa 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -218,7 +218,7 @@ def stringify(value): v = ensure_decoded(v) v = Timestamp(v) if v is not NaT: - v = v._as_unit("ns") # pyright: ignore[reportGeneralTypeIssues] + v = v.as_unit("ns") # pyright: ignore[reportGeneralTypeIssues] if v.tz is not None: v = v.tz_convert("UTC") return TermValue(v, v.value, kind) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fc30f490f7695..b6bb22daec13e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -764,7 +764,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, # error: Non-overlapping identity check (left operand type: # "Timestamp", right operand type: "NaTType") if val is not NaT: # type: ignore[comparison-overlap] - val = val._as_unit("ns") + val = val.as_unit("ns") except OutOfBoundsDatetime: return _dtype_obj, val diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 324a3ed0f81c8..12541ca8bfdac 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -418,7 +418,7 @@ def parse_dates_safe( d = {} if is_datetime64_dtype(dates.dtype): if delta: - time_delta = dates - Timestamp(stata_epoch)._as_unit("ns") + time_delta = dates - Timestamp(stata_epoch).as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 86adbfc428e5b..589df4299bdaf 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1147,7 +1147,7 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz): assert timezones.tz_compare(result.tz, tz) converted = to_datetime(dates_aware, utc=True) - ex_vals = np.array([Timestamp(x)._as_unit("ns").value for x in dates_aware]) + ex_vals = np.array([Timestamp(x).as_unit("ns").value for x in dates_aware]) tm.assert_numpy_array_equal(converted.asi8, ex_vals) assert converted.tz is pytz.utc diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 5d0f5871b708a..3c841d829efd7 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -383,7 +383,7 @@ def test_encode_as_null(self, decoded_input): def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) - stamp = Timestamp(val)._as_unit("ns") + stamp = Timestamp(val).as_unit("ns") roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) assert roundtrip == stamp.value // 10**9 diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 6e9a984603cbf..112f23b3b0f16 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -821,7 +821,7 @@ def test_end_time(self): p = Period("2012", freq="A") def _ex(*args): - return Timestamp(Timestamp(datetime(*args))._as_unit("ns").value - 1) + return Timestamp(Timestamp(datetime(*args)).as_unit("ns").value - 1) xp = _ex(2013, 1, 1) assert xp == p.end_time @@ -873,7 +873,7 @@ def test_end_time_business_friday(self): def test_anchor_week_end_time(self): def _ex(*args): - return Timestamp(Timestamp(datetime(*args))._as_unit("ns").value - 1) + return Timestamp(Timestamp(datetime(*args)).as_unit("ns").value - 1) p = Period("2013-1-1", "W-SAT") xp = _ex(2013, 1, 6) diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 41dd143ec14b4..0ddbdddef5465 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -198,7 +198,7 @@ def test_radd_tdscalar(self, td, fixed_now_ts): ) def test_timestamp_add_timedelta64_unit(self, other, expected_difference): now = datetime.utcnow() - ts = Timestamp(now)._as_unit("ns") + ts = Timestamp(now).as_unit("ns") result = ts + other valdiff = result.value - ts.value assert valdiff == expected_difference diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 9e6a438d033ea..a7452ac73c6a2 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -99,9 +99,7 @@ def test_constructor(self): for date_str, date, expected in tests: for result in [Timestamp(date_str), Timestamp(date)]: - result = result._as_unit( - "ns" - ) # test originally written before non-nano + result = result.as_unit("ns") # test originally written before non-nano # only with timestring assert result.value == expected @@ -112,7 +110,7 @@ def test_constructor(self): # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: - result = result._as_unit( + result = result.as_unit( "ns" ) # test originally written before non-nano expected_tz = expected - offset * 3600 * 1_000_000_000 diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 8e315beb4ee9c..c20e6052b1f7e 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -580,8 +580,7 @@ def test_to_datetime_bijective(self): pydt_max = Timestamp.max.to_pydatetime() assert ( - Timestamp(pydt_max)._as_unit("ns").value / 1000 - == Timestamp.max.value / 1000 + Timestamp(pydt_max).as_unit("ns").value / 1000 == Timestamp.max.value / 1000 ) exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning @@ -594,7 +593,7 @@ def test_to_datetime_bijective(self): assert pydt_min + tdus > Timestamp.min assert ( - Timestamp(pydt_min + tdus)._as_unit("ns").value / 1000 + Timestamp(pydt_min + tdus).as_unit("ns").value / 1000 == Timestamp.min.value / 1000 ) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a051833867493..adc94c06ce042 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -654,7 +654,7 @@ def test_to_datetime_today(self, tz): pdtoday2 = to_datetime(["today"])[0] tstoday = Timestamp("today") - tstoday2 = Timestamp.today()._as_unit("ns") + tstoday2 = Timestamp.today().as_unit("ns") # These should all be equal with infinite perf; this gives # a generous margin of 10 seconds From 40f28a1153b15ebb3a2b981ea73e17730657653d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Nov 2022 15:55:11 -0800 Subject: [PATCH 15/22] suggestion json validation --- pandas/_libs/src/ujson/python/objToJSON.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 5fa268dd3884b..190fd5656fde0 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -294,15 +294,16 @@ static npy_int64 get_long_attr(PyObject *o, const char *attr) { // ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit PyObject* reso = PyObject_GetAttrString(o, "_creso"); - // if (!PyLong_Check(reso)) { - // TODO(anyone): we should have error handling here, but one step at a time - // } + if (!PyLong_Check(reso)) { + // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139 + return -1; + } long cReso = PyLong_AsLong(reso); - // if (cReso == -1 && PyErr_Occurred()) { - // TODO(anyone): we should have error handling here, but one step at a time - // } Py_DECREF(reso); + if (cReso == -1 && PyErr_Occurred()) { + return -1; + } if (cReso == NPY_FR_us) { long_val = long_val * 1000L; From 83cf179394d95fd2522423ddb6c52208864b5523 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Nov 2022 10:12:12 -0800 Subject: [PATCH 16/22] extra Py_DECREF --- pandas/_libs/src/ujson/python/objToJSON.c | 1 + pandas/_libs/tslib.pyx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 190fd5656fde0..212d437f542d7 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -296,6 +296,7 @@ static npy_int64 get_long_attr(PyObject *o, const char *attr) { PyObject* reso = PyObject_GetAttrString(o, "_creso"); if (!PyLong_Check(reso)) { // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139 + Py_DECREF(reso); return -1; } diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3ea1f49b33665..7eda5bb73348b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -550,7 +550,7 @@ cpdef array_to_datetime( raise ValueError('Cannot mix tz-aware with ' 'tz-naive values') if isinstance(val, _Timestamp): - iresult[i] = (<_Timestamp>val)._as_creso(NPY_FR_ns).value + iresult[i] = val.as_unit("ns").value else: iresult[i] = pydatetime_to_dt64(val, &dts) check_dts_bounds(&dts) From 0eafbd55d7c228f3112e7b418ba04edc2a049e16 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Nov 2022 13:52:51 -0800 Subject: [PATCH 17/22] requested refactor --- pandas/_libs/src/ujson/python/objToJSON.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 212d437f542d7..d4ec21f38cdad 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -287,8 +287,8 @@ static npy_int64 get_long_attr(PyObject *o, const char *attr) { Py_DECREF(value); - if (long_val == NPY_MIN_INT64) { - // i.e. o is NaT + if (object_is_nat_type(o)) { + // i.e. o is NaT, long_val will be NPY_MIN_INT64 return long_val; } From c7c0ceeb84bf7de0c9e5dc6c15f400a5686d6a55 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 14:55:58 -0800 Subject: [PATCH 18/22] fix doctest --- pandas/core/arrays/datetimes.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3da3ffda3586c..5147fddd7b471 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -445,7 +445,7 @@ def _generate_range( # type: ignore[override] i8values = generate_regular_range(start, end, periods, freq, unit=unit) else: xdr = _generate_range( - start=start, end=end, periods=periods, offset=freq + start=start, end=end, periods=periods, offset=freq, unit=unit ) i8values = np.array([x.value for x in xdr], dtype=np.int64) @@ -2489,6 +2489,8 @@ def _generate_range( end: Timestamp | None, periods: int | None, offset: BaseOffset, + *, + unit: str, ): """ Generates a sequence of dates corresponding to the specified time @@ -2500,7 +2502,8 @@ def _generate_range( start : Timestamp or None end : Timestamp or None periods : int or None - offset : DateOffset, + offset : DateOffset + unit : str Notes ----- @@ -2521,7 +2524,7 @@ def _generate_range( # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") if start is not NaT: # type: ignore[comparison-overlap] - start = start.as_unit("ns") + start = start.as_unit(unit) else: start = None @@ -2531,7 +2534,7 @@ def _generate_range( # Non-overlapping identity check (left operand type: "Timestamp", right # operand type: "NaTType") if end is not NaT: # type: ignore[comparison-overlap] - end = end.as_unit("ns") + end = end.as_unit(unit) else: end = None @@ -2574,7 +2577,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur).as_unit("ns") + next_date = offset._apply(cur).as_unit(unit) if next_date <= cur: raise ValueError(f"Offset {offset} did not increment date") cur = next_date @@ -2588,7 +2591,7 @@ def _generate_range( break # faster than cur + offset - next_date = offset._apply(cur).as_unit("ns") + next_date = offset._apply(cur).as_unit(unit) if next_date >= cur: raise ValueError(f"Offset {offset} did not decrement date") cur = next_date From eab61b9d469f4678baa02534c0d33ae9640dcfbe Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 16:17:59 -0800 Subject: [PATCH 19/22] unit keyword --- .../tests/indexes/datetimes/test_date_range.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e90f9fb2b5e36..14bfb14d27239 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -847,19 +847,23 @@ def test_date_range_with_tz(self, tzstr): class TestGenRangeGeneration: def test_generate(self): - rng1 = list(generate_range(START, END, periods=None, offset=BDay())) - rng2 = list(generate_range(START, END, periods=None, offset="B")) + rng1 = list(generate_range(START, END, periods=None, offset=BDay(), unit="ns")) + rng2 = list(generate_range(START, END, periods=None, offset="B", unit="ns")) assert rng1 == rng2 def test_generate_cday(self): - rng1 = list(generate_range(START, END, periods=None, offset=CDay())) - rng2 = list(generate_range(START, END, periods=None, offset="C")) + rng1 = list(generate_range(START, END, periods=None, offset=CDay(), unit="ns")) + rng2 = list(generate_range(START, END, periods=None, offset="C", unit="ns")) assert rng1 == rng2 def test_1(self): rng = list( generate_range( - start=datetime(2009, 3, 25), end=None, periods=2, offset=BDay() + start=datetime(2009, 3, 25), + end=None, + periods=2, + offset=BDay(), + unit="ns", ) ) expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] @@ -872,6 +876,7 @@ def test_2(self): end=datetime(2008, 1, 3), periods=None, offset=BDay(), + unit="ns", ) ) expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)] @@ -884,6 +889,7 @@ def test_3(self): end=datetime(2008, 1, 6), periods=None, offset=BDay(), + unit="ns", ) ) expected = [] From 6975db1dedb61caf7b0187e4dbed3f2030613487 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Nov 2022 14:05:16 -0800 Subject: [PATCH 20/22] Update pandas/_libs/tslibs/conversion.pyx Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9bbffed921849..17facf9e16f4b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -305,7 +305,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's - # For date object we give the lowest supporte resolution, ie. "s" + # For date object we give the lowest supported resolution, i.e. "s" ts = datetime.combine(ts, time()) return convert_datetime_to_tsobject( ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s From c15675d6763534cb2a201ae42e230ee4084bb430 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Nov 2022 14:11:09 -0800 Subject: [PATCH 21/22] dedicate pydate reso test --- pandas/tests/scalar/timestamp/test_constructors.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index a7452ac73c6a2..4294bf326950c 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -1,5 +1,6 @@ import calendar from datetime import ( + date, datetime, timedelta, timezone, @@ -23,6 +24,13 @@ class TestTimestampConstructors: + def test_constructor_from_date_second_reso(self): + # GH#49034 constructing from a pydate object gets lowest supported + # reso, i.e. seconds + obj = date(2012, 9, 1) + ts = Timestamp(obj) + assert ts.unit == "s" + @pytest.mark.parametrize("typ", [int, float]) def test_constructor_int_float_with_YM_unit(self, typ): # GH#47266 avoid the conversions in cast_from_unit @@ -97,8 +105,8 @@ def test_constructor(self): (dateutil.tz.tzoffset(None, 18000), 5), ] - for date_str, date, expected in tests: - for result in [Timestamp(date_str), Timestamp(date)]: + for date_str, date_obj, expected in tests: + for result in [Timestamp(date_str), Timestamp(date_obj)]: result = result.as_unit("ns") # test originally written before non-nano # only with timestring assert result.value == expected @@ -109,7 +117,7 @@ def test_constructor(self): # with timezone for tz, offset in timezones: - for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: + for result in [Timestamp(date_str, tz=tz), Timestamp(date_obj, tz=tz)]: result = result.as_unit( "ns" ) # test originally written before non-nano From 8155f2a94e6fbfe8efad71b8e2a25c6691ce8d0c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Nov 2022 09:07:41 -0800 Subject: [PATCH 22/22] fix failing resample test --- pandas/core/indexes/datetimes.py | 3 ++- pandas/core/resample.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 022476af1e173..c30323338e676 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -9,6 +9,7 @@ import warnings import numpy as np +import pytz from pandas._libs import ( NaT, @@ -578,7 +579,7 @@ def get_loc(self, key, method=None, tolerance=None): try: parsed, reso = self._parse_with_reso(key) - except ValueError as err: + except (ValueError, pytz.NonExistentTimeError) as err: raise KeyError(key) from err self._disallow_mismatched_indexing(parsed) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f5c76aade9956..f5aeb61df633a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2074,6 +2074,11 @@ def _adjust_dates_anchored( # not a multiple of the frequency. See GH 8683 # To handle frequencies that are not multiple or divisible by a day we let # the possibility to define a fixed origin timestamp. See GH 31809 + first = first.as_unit("ns") + last = last.as_unit("ns") + if offset is not None: + offset = offset.as_unit("ns") + origin_nanos = 0 # origin == "epoch" if origin == "start_day": origin_nanos = first.normalize().value