Skip to content

API: Timestamp(pydatetime) microsecond reso #49034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 41 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
261719c
API: Timedelta(td64_obj) retain resolution
jbrockmendel Oct 1, 2022
a8c6906
BUG: preserve DTA/TDA+timedeltalike scalar with mismatched resos
jbrockmendel Oct 3, 2022
6f5d4b5
BUG: DatetimeArray-datetimelike mixed resos
jbrockmendel Sep 30, 2022
ad51d10
API: Timestamp(pydatetime) microsecond reso
jbrockmendel Oct 10, 2022
74105d8
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 11, 2022
43436ce
use willayd suggestion
jbrockmendel Oct 11, 2022
dad131f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 12, 2022
8802add
ci fixup
jbrockmendel Oct 12, 2022
4c6f0f6
mypy fixup
jbrockmendel Oct 12, 2022
5c18738
ignore pyright
jbrockmendel Oct 12, 2022
17682b5
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 12, 2022
aeadbdc
fix doctest
jbrockmendel Oct 12, 2022
382c46e
un-xfail
jbrockmendel Oct 13, 2022
85aba3f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 13, 2022
f8cef09
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 14, 2022
bc6f014
Merge main follow-up
jbrockmendel Oct 14, 2022
343954f
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 17, 2022
7f8db31
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Oct 31, 2022
25db552
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 10, 2022
fe8c444
s reso for pydate
jbrockmendel Nov 10, 2022
d5e94d1
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 10, 2022
7717c10
typo fixup
jbrockmendel Nov 10, 2022
b4ebc62
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 15, 2022
06945fc
post-merge fixups
jbrockmendel Nov 15, 2022
bf9705a
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 16, 2022
40f28a1
suggestion json validation
jbrockmendel Nov 16, 2022
e32996e
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 17, 2022
83cf179
extra Py_DECREF
jbrockmendel Nov 17, 2022
0eafbd5
requested refactor
jbrockmendel Nov 17, 2022
072eaee
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 22, 2022
fd0125d
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 22, 2022
c7c0cee
fix doctest
jbrockmendel Nov 22, 2022
eab61b9
unit keyword
jbrockmendel Nov 23, 2022
afe09bb
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 23, 2022
257276d
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 28, 2022
6975db1
Update pandas/_libs/tslibs/conversion.pyx
jbrockmendel Nov 29, 2022
a640de0
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 29, 2022
c15675d
dedicate pydate reso test
jbrockmendel Nov 29, 2022
be83f35
Merge branch 'nano-tstamp-pydatetime' of github.com:jbrockmendel/pand…
jbrockmendel Nov 29, 2022
8155f2a
fix failing resample test
jbrockmendel Nov 30, 2022
497ee62
Merge branch 'main' into nano-tstamp-pydatetime
jbrockmendel Nov 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,16 @@ from pandas._libs.missing cimport (
is_null_datetime64,
is_null_timedelta64,
)
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
from pandas._libs.tslibs.conversion cimport (
_TSObject,
convert_to_tsobject,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
checknull_with_nat,
)
from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.period cimport is_period_object
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
Expand Down Expand Up @@ -2378,6 +2382,7 @@ def maybe_convert_objects(ndarray[object] objects,
ndarray[uint8_t] bools
Seen seen = Seen()
object val
_TSObject tsobj
float64_t fnan = np.nan

if dtype_if_all_nat is not None:
Expand Down Expand Up @@ -2470,7 +2475,8 @@ def maybe_convert_objects(ndarray[object] objects,
else:
seen.datetime_ = True
try:
convert_to_tsobject(val, None, None, 0, 0)
tsobj = convert_to_tsobject(val, None, None, 0, 0)
tsobj.ensure_reso(NPY_FR_ns)
except OutOfBoundsDatetime:
seen.object_ = True
break
Expand Down
32 changes: 32 additions & 0 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,42 @@ static int is_simple_frame(PyObject *obj) {
}

static npy_int64 get_long_attr(PyObject *o, const char *attr) {
// NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT

npy_int64 long_val;
PyObject *value = PyObject_GetAttrString(o, attr);
long_val =
(PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));

Py_DECREF(value);

if (object_is_nat_type(o)) {
// i.e. o is NaT, long_val will be NPY_MIN_INT64
return long_val;
}

// ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit
PyObject* reso = PyObject_GetAttrString(o, "_creso");
if (!PyLong_Check(reso)) {
// https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139
Py_DECREF(reso);
return -1;
}

long cReso = PyLong_AsLong(reso);
Py_DECREF(reso);
if (cReso == -1 && PyErr_Occurred()) {
return -1;
}

if (cReso == NPY_FR_us) {
long_val = long_val * 1000L;
} else if (cReso == NPY_FR_ms) {
long_val = long_val * 1000000L;
} else if (cReso == NPY_FR_s) {
long_val = long_val * 1000000000L;
}

return long_val;
}

Expand Down Expand Up @@ -1265,6 +1296,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
is_datetimelike = 1;
if (PyObject_HasAttrString(item, "value")) {
// see test_date_index_and_values for case with non-nano
nanosecVal = get_long_attr(item, "value");
} else {
if (PyDelta_Check(item)) {
Expand Down
9 changes: 6 additions & 3 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -841,16 +841,19 @@ cdef _array_to_datetime_object(
cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc):
# We delay this check for as long as possible
# because it catches relatively rare cases

# Multiply by 1000 to convert to nanos, since these methods naturally have
# microsecond resolution
if val == "now":
if utc:
iresult[0] = Timestamp.utcnow().value
iresult[0] = Timestamp.utcnow().value * 1000
else:
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
# Note using Timestamp.now() is faster than Timestamp("now")
iresult[0] = Timestamp.now().value
iresult[0] = Timestamp.now().value * 1000
return True
elif val == "today":
iresult[0] = Timestamp.today().value
iresult[0] = Timestamp.today().value * 1000
return True
return False

Expand Down
24 changes: 21 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ from pandas._libs.tslibs.dtypes cimport (
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
NPY_FR_us,
check_dts_bounds,
convert_reso,
get_datetime64_unit,
Expand Down Expand Up @@ -212,7 +213,12 @@ cdef class _TSObject:

cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1:
if self.creso != creso:
self.value = convert_reso(self.value, self.creso, creso, False)
try:
self.value = convert_reso(self.value, self.creso, creso, False)
except OverflowError as err:
raise OutOfBoundsDatetime from err

self.creso = creso
return self.value


Expand Down Expand Up @@ -288,11 +294,22 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
obj.value = ts
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
elif PyDateTime_Check(ts):
return convert_datetime_to_tsobject(ts, tz, nanos)
if nanos == 0:
if isinstance(ts, ABCTimestamp):
reso = abbrev_to_npy_unit(ts.unit) # TODO: faster way to do this?
else:
# TODO: what if user explicitly passes nanos=0?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to hit this? Maybe we should raise instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it could happen with pd.Timestamp(pydatetime_obj, nanosecond=0)

reso = NPY_FR_us
else:
reso = NPY_FR_ns
return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso)
elif PyDate_Check(ts):
# Keep the converter same as PyDateTime's
# For date object we give the lowest supporte resolution, ie. "s"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a test where we construct a Timestamp from a datetime.date?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dedicated test added + green

ts = datetime.combine(ts, time())
return convert_datetime_to_tsobject(ts, tz)
return convert_datetime_to_tsobject(
ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s
)
else:
from .period import Period
if isinstance(ts, Period):
Expand Down Expand Up @@ -346,6 +363,7 @@ cdef _TSObject convert_datetime_to_tsobject(
_TSObject obj = _TSObject()
int64_t pps

obj.creso = reso
obj.fold = ts.fold
if tz is not None:
tz = maybe_get_tz(tz)
Expand Down
8 changes: 5 additions & 3 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def apply_wraps(func):

result = func(self, other)

result = Timestamp(result)
result = (<_Timestamp>Timestamp(result))._as_creso(other._creso)

if self._adjust_dst:
result = result.tz_localize(tz)

Expand All @@ -175,9 +176,10 @@ def apply_wraps(func):
if result.nanosecond != nano:
if result.tz is not None:
# convert to UTC
value = result.tz_localize(None).value
res = result.tz_localize(None)
else:
value = result.value
res = result
value = res.as_unit("ns").value
result = Timestamp(value + nano)

if tz is not None and result.tzinfo is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ cdef class _Timestamp(ABCTimestamp):
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
int op) except -1
cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op)
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*)
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT creso, bint round_ok=*)
21 changes: 14 additions & 7 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,9 @@ cdef class _Timestamp(ABCTimestamp):
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._creso < other._creso:
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=False)
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=True)
elif self._creso > other._creso:
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=False)
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=True)

# scalar Timestamp/datetime - Timestamp/datetime -> yields a
# Timedelta
Expand Down Expand Up @@ -983,15 +983,22 @@ cdef class _Timestamp(ABCTimestamp):
# Conversion Methods

@cython.cdivision(False)
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT creso, bint round_ok=True):
cdef:
int64_t value

if reso == self._creso:
if creso == self._creso:
return self

value = convert_reso(self.value, self._creso, reso, round_ok=round_ok)
return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo)
try:
value = convert_reso(self.value, self._creso, creso, round_ok=round_ok)
except OverflowError as err:
unit = npy_unit_to_abbrev(creso)
raise OutOfBoundsDatetime(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err

return type(self)._from_value_and_reso(value, reso=creso, tz=self.tzinfo)

def as_unit(self, str unit, bint round_ok=True):
"""
Expand Down Expand Up @@ -1025,7 +1032,7 @@ cdef class _Timestamp(ABCTimestamp):
--------
>>> ts = pd.Timestamp(2020, 3, 14, 15)
>>> ts.asm8
numpy.datetime64('2020-03-14T15:00:00.000000000')
numpy.datetime64('2020-03-14T15:00:00.000000')
"""
return self.to_datetime64()

Expand Down
7 changes: 7 additions & 0 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,14 @@ def wrapper(
if out_dtype is not None:
out = out.view(out_dtype)
if fill_wrap is not None:
# FIXME: if we get here with dt64/td64 we need to be sure we have
# matching resos
if fill_value.dtype.kind == "m":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth making a re-usable function for this? I could see this useful in other areas. Something like reso_for_type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'd like to hold off on that as out of scope

fill_value = fill_value.astype("m8[ns]")
else:
fill_value = fill_value.astype("M8[ns]")
fill_value = fill_wrap(fill_value)

f(arr, indexer, out, fill_value=fill_value)

return wrapper
Expand Down
27 changes: 20 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def _generate_range( # type: ignore[override]
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
else:
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq
start=start, end=end, periods=periods, offset=freq, unit=unit
)
i8values = np.array([x.value for x in xdr], dtype=np.int64)

Expand Down Expand Up @@ -508,7 +508,10 @@ def _unbox_scalar(self, value) -> np.datetime64:
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
self._check_compatible_with(value)
return value.asm8
if value is NaT:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible I've missed this conversation but do we need to give consideration to a generic NaT type that can hold different precisions? Or are we always going to use numpy's value?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to give consideration to a generic NaT type that can hold different precisions

The closest I've seen to this has been a discussion of having a separate NaT-like for timedelta. I'm not aware of any discussion of a resolution-specific NaT.

return np.datetime64(value.value, self.unit)
else:
return value.as_unit(self.unit).asm8

def _scalar_from_string(self, value) -> Timestamp | NaTType:
return Timestamp(value, tz=self.tz)
Expand Down Expand Up @@ -2474,6 +2477,8 @@ def _generate_range(
end: Timestamp | None,
periods: int | None,
offset: BaseOffset,
*,
unit: str,
):
"""
Generates a sequence of dates corresponding to the specified time
Expand All @@ -2485,7 +2490,8 @@ def _generate_range(
start : Timestamp or None
end : Timestamp or None
periods : int or None
offset : DateOffset,
offset : DateOffset
unit : str

Notes
-----
Expand All @@ -2505,13 +2511,20 @@ def _generate_range(
start = Timestamp(start) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
start = start if start is not NaT else None # type: ignore[comparison-overlap]
if start is not NaT: # type: ignore[comparison-overlap]
start = start.as_unit(unit)
else:
start = None

# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
# expected "Union[integer[Any], float, str, date, datetime64]"
end = Timestamp(end) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
end = end if end is not NaT else None # type: ignore[comparison-overlap]
if end is not NaT: # type: ignore[comparison-overlap]
end = end.as_unit(unit)
else:
end = None

if start and not offset.is_on_offset(start):
# Incompatible types in assignment (expression has type "datetime",
Expand Down Expand Up @@ -2552,7 +2565,7 @@ def _generate_range(
break

# faster than cur + offset
next_date = offset._apply(cur)
next_date = offset._apply(cur).as_unit(unit)
if next_date <= cur:
raise ValueError(f"Offset {offset} did not increment date")
cur = next_date
Expand All @@ -2566,7 +2579,7 @@ def _generate_range(
break

# faster than cur + offset
next_date = offset._apply(cur)
next_date = offset._apply(cur).as_unit(unit)
if next_date >= cur:
raise ValueError(f"Offset {offset} did not decrement date")
cur = next_date
3 changes: 3 additions & 0 deletions pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np

from pandas._libs.tslibs import (
NaT,
Timedelta,
Timestamp,
)
Expand Down Expand Up @@ -216,6 +217,8 @@ def stringify(value):
v = stringify(v)
v = ensure_decoded(v)
v = Timestamp(v)
if v is not NaT:
v = v.as_unit("ns") # pyright: ignore[reportGeneralTypeIssues]
if v.tz is not None:
v = v.tz_convert("UTC")
return TermValue(v, v.value, kind)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,16 +754,21 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
elif isinstance(val, (np.datetime64, dt.datetime)):
try:
val = Timestamp(val)
# error: Non-overlapping identity check (left operand type:
# "Timestamp", right operand type: "NaTType")
if val is not NaT: # type: ignore[comparison-overlap]
val = val.as_unit("ns")
except OutOfBoundsDatetime:
return _dtype_obj, val

# error: Non-overlapping identity check (left operand type: "Timestamp",
# right operand type: "NaTType")
if val is NaT or val.tz is None: # type: ignore[comparison-overlap]
dtype = np.dtype("M8[ns]")
val = val.to_datetime64()
dtype = val.dtype
# TODO(2.0): this should be dtype = val.dtype
# to get the correct M8 resolution
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
else:
if pandas_dtype:
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def parse_dates_safe(
d = {}
if is_datetime64_dtype(dates.dtype):
if delta:
time_delta = dates - stata_epoch
time_delta = dates - Timestamp(stata_epoch).as_unit("ns")
d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds
if days or year:
date_index = DatetimeIndex(dates)
Expand Down
Loading