Skip to content

API: Timestamp(pydatetime) microsecond reso #49034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 41 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
261719c
API: Timedelta(td64_obj) retain resolution
jbrockmendel Oct 1, 2022
a8c6906
BUG: preserve DTA/TDA+timedeltalike scalar with mismatched resos
jbrockmendel Oct 3, 2022
6f5d4b5
BUG: DatetimeArray-datetimelike mixed resos
jbrockmendel Sep 30, 2022
ad51d10
API: Timestamp(pydatetime) microsecond reso
jbrockmendel Oct 10, 2022
74105d8
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 11, 2022
43436ce
use willayd suggestion
jbrockmendel Oct 11, 2022
dad131f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 12, 2022
8802add
ci fixup
jbrockmendel Oct 12, 2022
4c6f0f6
mypy fixup
jbrockmendel Oct 12, 2022
5c18738
ignore pyright
jbrockmendel Oct 12, 2022
17682b5
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 12, 2022
aeadbdc
fix doctest
jbrockmendel Oct 12, 2022
382c46e
un-xfail
jbrockmendel Oct 13, 2022
85aba3f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 13, 2022
f8cef09
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 14, 2022
bc6f014
Merge main follow-up
jbrockmendel Oct 14, 2022
343954f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 17, 2022
7f8db31
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 31, 2022
25db552
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 10, 2022
fe8c444
s reso for pydate
jbrockmendel Nov 10, 2022
d5e94d1
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 10, 2022
7717c10
typo fixup
jbrockmendel Nov 10, 2022
b4ebc62
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 15, 2022
06945fc
post-merge fixups
jbrockmendel Nov 15, 2022
bf9705a
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 16, 2022
40f28a1
suggestion json validation
jbrockmendel Nov 16, 2022
e32996e
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 17, 2022
83cf179
extra Py_DECREF
jbrockmendel Nov 17, 2022
0eafbd5
requested refactor
jbrockmendel Nov 17, 2022
072eaee
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 22, 2022
fd0125d
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 22, 2022
c7c0cee
fix doctest
jbrockmendel Nov 22, 2022
eab61b9
unit keyword
jbrockmendel Nov 23, 2022
afe09bb
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 23, 2022
257276d
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 28, 2022
6975db1
Update pandas/_libs/tslibs/conversion.pyx
jbrockmendel Nov 29, 2022
a640de0
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 29, 2022
c15675d
dedicate pydate reso test
jbrockmendel Nov 29, 2022
be83f35
Merge branch 'nano-tstamp-pydatetime' of github.com:jbrockmendel/pand…
jbrockmendel Nov 29, 2022
8155f2a
fix failing resample test
jbrockmendel Nov 30, 2022
497ee62
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,16 @@ from pandas._libs.missing cimport (
is_null_datetime64,
is_null_timedelta64,
)
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
from pandas._libs.tslibs.conversion cimport (
_TSObject,
convert_to_tsobject,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
checknull_with_nat,
)
from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.period cimport is_period_object
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
Expand Down Expand Up @@ -2452,6 +2456,7 @@ def maybe_convert_objects(ndarray[object] objects,
Seen seen = Seen()
object val
float64_t fval, fnan = np.nan
_TSObject tsobj

n = len(objects)

Expand Down Expand Up @@ -2545,8 +2550,9 @@ def maybe_convert_objects(ndarray[object] objects,
else:
seen.datetime_ = True
try:
idatetimes[i] = convert_to_tsobject(
val, None, None, 0, 0).value
tsobj = convert_to_tsobject(val, None, None, 0, 0)
tsobj.ensure_reso(NPY_FR_ns)
idatetimes[i] = tsobj.value
except OutOfBoundsDatetime:
seen.object_ = True
break
Expand Down
30 changes: 30 additions & 0 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,40 @@ static int is_simple_frame(PyObject *obj) {
}

static npy_int64 get_long_attr(PyObject *o, const char *attr) {
// NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT

npy_int64 long_val;
PyObject *value = PyObject_GetAttrString(o, attr);
long_val =
(PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));

Py_DECREF(value);

if (long_val == NPY_MIN_INT64) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this can be completely replaced by object_is_nat_type(o)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you still use this function? Shouldn't require the creation of anything new

// i.e. o is NaT
return long_val;
}

// ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit
PyObject* reso = PyObject_GetAttrString(o, "_creso");
// if (!PyLong_Check(reso)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// if (!PyLong_Check(reso)) {
if (!PyLong_Check(reso))
return -1;

Might as well set this up now. The caller should be checking for -1 and PyErr_Occurred() (can be a separate PR if not)

// TODO(anyone): we should have error handling here, but one step at a time
// }

long cReso = PyLong_AsLong(reso);
// if (cReso == -1 && PyErr_Occurred()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// if (cReso == -1 && PyErr_Occurred()) {
if (cReso == -1 && PyErr_Occurred())
return -1;

// TODO(anyone): we should have error handling here, but one step at a time
// }
Py_DECREF(reso);

if (cReso == NPY_FR_us) {
long_val = long_val * 1000L;
} else if (cReso == NPY_FR_ms) {
long_val = long_val * 1000000L;
} else if (cReso == NPY_FR_s) {
long_val = long_val * 1000000000L;
}

return long_val;
}

Expand Down Expand Up @@ -1305,6 +1334,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
is_datetimelike = 1;
if (PyObject_HasAttrString(item, "value")) {
// see test_date_index_and_values for case with non-nano
nanosecVal = get_long_attr(item, "value");
} else {
if (PyDelta_Check(item)) {
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ cpdef array_to_datetime(
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if isinstance(val, _Timestamp):
iresult[i] = val._as_unit("ns").value
iresult[i] = (<_Timestamp>val)._as_creso(NPY_FR_ns).value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
Expand Down Expand Up @@ -855,7 +855,7 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc):
# We delay this check for as long as possible
# because it catches relatively rare cases
if val == "now":
iresult[0] = Timestamp.utcnow().value
iresult[0] = Timestamp.utcnow().value * 1000 # *1000 to convert to nanos
if not utc:
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
warnings.warn(
Expand All @@ -868,6 +868,6 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc):

return True
elif val == "today":
iresult[0] = Timestamp.today().value
iresult[0] = Timestamp.today().value * 1000 # *1000 to convert to nanos
return True
return False
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ cdef class _TSObject:
bint fold
NPY_DATETIMEUNIT creso

cdef void ensure_reso(self, NPY_DATETIMEUNIT creso)
cdef ensure_reso(self, NPY_DATETIMEUNIT creso)


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down
23 changes: 19 additions & 4 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ from pandas._libs.tslibs.dtypes cimport (
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
NPY_FR_us,
check_dts_bounds,
convert_reso,
get_datetime64_unit,
Expand Down Expand Up @@ -212,9 +213,14 @@ cdef class _TSObject:
self.fold = 0
self.creso = NPY_FR_ns # default value

cdef void ensure_reso(self, NPY_DATETIMEUNIT creso):
cdef ensure_reso(self, NPY_DATETIMEUNIT creso):
if self.creso != creso:
self.value = convert_reso(self.value, self.creso, creso, False)
try:
self.value = convert_reso(self.value, self.creso, creso, False)
except OverflowError as err:
raise OutOfBoundsDatetime from err

self.creso = creso


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down Expand Up @@ -292,11 +298,19 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
obj.value = ts
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
elif PyDateTime_Check(ts):
return convert_datetime_to_tsobject(ts, tz, nanos)
if nanos == 0:
if isinstance(ts, ABCTimestamp):
reso = abbrev_to_npy_unit(ts._unit) # TODO: faster way to do this?
else:
# TODO: what if user explicitly passes nanos=0?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to hit this? Maybe we should raise instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it could happen with pd.Timestamp(pydatetime_obj, nanosecond=0)

reso = NPY_FR_us
else:
reso = NPY_FR_ns
return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso)
elif PyDate_Check(ts):
# Keep the converter same as PyDateTime's
ts = datetime.combine(ts, time())
return convert_datetime_to_tsobject(ts, tz)
return convert_datetime_to_tsobject(ts, tz, nanos=0, reso=NPY_FR_us) # TODO: or lower?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO I would cast dates to the lowest resolution (and document it)

else:
from .period import Period
if isinstance(ts, Period):
Expand Down Expand Up @@ -350,6 +364,7 @@ cdef _TSObject convert_datetime_to_tsobject(
_TSObject obj = _TSObject()
int64_t pps

obj.creso = reso
obj.fold = ts.fold
if tz is not None:
tz = maybe_get_tz(tz)
Expand Down
8 changes: 5 additions & 3 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ def apply_wraps(func):

result = func(self, other)

result = Timestamp(result)
result = (<_Timestamp>Timestamp(result))._as_creso(other._creso)

if self._adjust_dst:
result = result.tz_localize(tz)

Expand All @@ -178,9 +179,10 @@ def apply_wraps(func):
if result.nanosecond != nano:
if result.tz is not None:
# convert to UTC
value = result.tz_localize(None).value
res = result.tz_localize(None)
else:
value = result.value
res = result
value = res._as_unit("ns").value
result = Timestamp(value + nano)

if tz is not None and result.tzinfo is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ cdef class _Timestamp(ABCTimestamp):
cpdef void _set_freq(self, freq)
cdef _warn_on_field_deprecation(_Timestamp self, freq, str field)
cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op)
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*)
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT creso, bint round_ok=*)
21 changes: 14 additions & 7 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -525,9 +525,9 @@ cdef class _Timestamp(ABCTimestamp):
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._creso < other._creso:
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=False)
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=True)
elif self._creso > other._creso:
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=False)
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=True)

# scalar Timestamp/datetime - Timestamp/datetime -> yields a
# Timedelta
Expand Down Expand Up @@ -1062,15 +1062,22 @@ cdef class _Timestamp(ABCTimestamp):
# Conversion Methods

@cython.cdivision(False)
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT creso, bint round_ok=True):
cdef:
int64_t value

if reso == self._creso:
if creso == self._creso:
return self

value = convert_reso(self.value, self._creso, reso, round_ok=round_ok)
return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo)
try:
value = convert_reso(self.value, self._creso, creso, round_ok=round_ok)
except OverflowError as err:
unit = npy_unit_to_abbrev(creso)
raise OutOfBoundsDatetime(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err

return type(self)._from_value_and_reso(value, reso=creso, tz=self.tzinfo)

def _as_unit(self, str unit, bint round_ok=True):
dtype = np.dtype(f"M8[{unit}]")
Expand All @@ -1091,7 +1098,7 @@ cdef class _Timestamp(ABCTimestamp):
--------
>>> ts = pd.Timestamp(2020, 3, 14, 15)
>>> ts.asm8
numpy.datetime64('2020-03-14T15:00:00.000000000')
numpy.datetime64('2020-03-14T15:00:00.000000')
"""
return self.to_datetime64()

Expand Down
7 changes: 7 additions & 0 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,14 @@ def wrapper(
if out_dtype is not None:
out = out.view(out_dtype)
if fill_wrap is not None:
# FIXME: if we get here with dt64/td64 we need to be sure we have
# matching resos
if fill_value.dtype.kind == "m":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth making a re-usable function for this? I could see this useful in other areas. Something like reso_for_type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'd like to hold off on that as out of scope

fill_value = fill_value.astype("m8[ns]")
else:
fill_value = fill_value.astype("M8[ns]")
fill_value = fill_wrap(fill_value)

f(arr, indexer, out, fill_value=fill_value)

return wrapper
Expand Down
26 changes: 21 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,11 @@ def _generate_range( # type: ignore[override]
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

if start is not None:
start = start._as_unit("ns")
if end is not None:
end = end._as_unit("ns")

left_inclusive, right_inclusive = validate_inclusive(inclusive)
start, end = _maybe_normalize_endpoints(start, end, normalize)
tz = _infer_tz_from_endpoints(start, end, tz)
Expand Down Expand Up @@ -477,7 +482,11 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64:
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
self._check_compatible_with(value, setitem=setitem)
return value.asm8

if value is NaT:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible I've missed this conversation but do we need to give consideration to a generic NaT type that can hold different precisions? Or are we always going to use numpy's value?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to give consideration to a generic NaT type that can hold different precisions

The closest I've seen to this has been a discussion of having a separate NaT-like for timedelta. I'm not aware of any discussion of a resolution-specific NaT.

return np.datetime64(value.value, self._unit)
else:
return value._as_unit(self._unit).asm8

def _scalar_from_string(self, value) -> Timestamp | NaTType:
return Timestamp(value, tz=self.tz)
Expand Down Expand Up @@ -2557,13 +2566,20 @@ def _generate_range(
start = Timestamp(start) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
start = start if start is not NaT else None # type: ignore[comparison-overlap]
if start is not NaT: # type: ignore[comparison-overlap]
start = start._as_unit("ns")
else:
start = None

# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
# expected "Union[integer[Any], float, str, date, datetime64]"
end = Timestamp(end) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
end = end if end is not NaT else None # type: ignore[comparison-overlap]
if end is not NaT: # type: ignore[comparison-overlap]
end = end._as_unit("ns")
else:
end = None

if start and not offset.is_on_offset(start):
# Incompatible types in assignment (expression has type "datetime",
Expand Down Expand Up @@ -2604,7 +2620,7 @@ def _generate_range(
break

# faster than cur + offset
next_date = offset._apply(cur)
next_date = offset._apply(cur)._as_unit("ns")
if next_date <= cur:
raise ValueError(f"Offset {offset} did not increment date")
cur = next_date
Expand All @@ -2618,7 +2634,7 @@ def _generate_range(
break

# faster than cur + offset
next_date = offset._apply(cur)
next_date = offset._apply(cur)._as_unit("ns")
if next_date >= cur:
raise ValueError(f"Offset {offset} did not decrement date")
cur = next_date
3 changes: 3 additions & 0 deletions pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np

from pandas._libs.tslibs import (
NaT,
Timedelta,
Timestamp,
)
Expand Down Expand Up @@ -216,6 +217,8 @@ def stringify(value):
v = stringify(v)
v = ensure_decoded(v)
v = Timestamp(v)
if v is not NaT:
v = v._as_unit("ns") # pyright: ignore[reportGeneralTypeIssues]
if v.tz is not None:
v = v.tz_convert("UTC")
return TermValue(v, v.value, kind)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,16 +795,21 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
elif isinstance(val, (np.datetime64, datetime)):
try:
val = Timestamp(val)
# error: Non-overlapping identity check (left operand type:
# "Timestamp", right operand type: "NaTType")
if val is not NaT: # type: ignore[comparison-overlap]
val = val._as_unit("ns")
except OutOfBoundsDatetime:
return _dtype_obj, val

# error: Non-overlapping identity check (left operand type: "Timestamp",
# right operand type: "NaTType")
if val is NaT or val.tz is None: # type: ignore[comparison-overlap]
dtype = np.dtype("M8[ns]")
val = val.to_datetime64()
dtype = val.dtype
# TODO(2.0): this should be dtype = val.dtype
# to get the correct M8 resolution
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
else:
if pandas_dtype:
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def parse_dates_safe(
d = {}
if is_datetime64_dtype(dates.dtype):
if delta:
time_delta = dates - stata_epoch
time_delta = dates - Timestamp(stata_epoch)._as_unit("ns")
d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds
if days or year:
date_index = DatetimeIndex(dates)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import numpy as np
import pytest

from pandas._libs.tslibs import tz_compare
from pandas._libs.tslibs.dtypes import (
NpyDatetimeUnit,
from pandas._libs.tslibs import (
npy_unit_to_abbrev,
tz_compare,
)
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit

from pandas.core.dtypes.dtypes import DatetimeTZDtype

Expand Down
Loading