-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: pd.to_datetime, unit='s' much slower for float64 than for int64 #35027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 36 commits
bb56553
de81148
9803670
a224e19
a7bb0d1
20162fe
a332e37
41f22fa
0617b2a
a501aa0
9be1567
1030374
ea932a9
9d47f14
a959535
efbd6ba
859b9a5
a4606a0
1597253
28397b0
1888681
ba5d3b5
c6d7746
eb81beb
7f68448
d9fb88f
b2119b7
2c39cd3
64c94fb
dd519da
5e5976d
b69df7a
d37b45c
05fab52
38a533f
b1d8149
a6d8d9e
e2e600b
c7a3b08
c0c31ca
111abb7
59290a0
611dad0
63fa94b
76cd0eb
46f25a4
b308ba7
1aa7bb2
a3f42df
8837ff4
6f9caeb
1ff89d4
8084caf
c238cec
416035b
47c2b5f
f216a43
5f76f48
bb8c35b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ from pandas._libs.tslibs.parsing import parse_datetime_string | |
from pandas._libs.tslibs.conversion cimport ( | ||
_TSObject, | ||
cast_from_unit, | ||
precision_from_unit, | ||
convert_datetime_to_tsobject, | ||
get_datetime64_nanos, | ||
) | ||
|
@@ -205,6 +206,7 @@ def array_with_unit_to_datetime( | |
cdef: | ||
Py_ssize_t i, j, n=len(values) | ||
int64_t m | ||
int prec = 0 | ||
ndarray[float64_t] fvalues | ||
bint is_ignore = errors=='ignore' | ||
bint is_coerce = errors=='coerce' | ||
|
@@ -217,38 +219,45 @@ def array_with_unit_to_datetime( | |
|
||
assert is_ignore or is_coerce or is_raise | ||
|
||
if unit == 'ns': | ||
if issubclass(values.dtype.type, np.integer): | ||
result = values.astype('M8[ns]') | ||
if unit == "ns": | ||
if issubclass(values.dtype.type, np.integer) or issubclass( | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
values.dtype.type, np.float_ | ||
): | ||
result = values.astype("M8[ns]") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does astype of floats directly to M8 work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i guess it does (as you use it below), but do we have a test specifically for float with unit='ns'? also can try .astype(..., copy=False) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
would it be better to do, here and below: ivalues = values.view("i8")
result = ivalues.astype("M8[ns]") There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'll look some more but I think we don't. Will add unless I find one |
||
else: | ||
result, tz = array_to_datetime(values.astype(object), errors=errors) | ||
return result, tz | ||
|
||
m = cast_from_unit(None, unit) | ||
m, p = precision_from_unit(unit) | ||
|
||
if is_raise: | ||
|
||
# try a quick conversion to i8 | ||
# try a quick conversion to i8/f8 | ||
# if we have nulls that are not type-compat | ||
# then need to iterate | ||
if values.dtype.kind == "i": | ||
# Note: this condition makes the casting="same_kind" redundant | ||
iresult = values.astype('i8', casting='same_kind', copy=False) | ||
# fill by comparing to NPY_NAT constant | ||
|
||
if values.dtype.kind == "i" or values.dtype.kind == "f": | ||
iresult = values.astype("i8", copy=False) | ||
# fill missing values by comparing to NPY_NAT | ||
mask = iresult == NPY_NAT | ||
iresult[mask] = 0 | ||
fvalues = iresult.astype('f8') * m | ||
fvalues = values.astype("f8") * m | ||
need_to_iterate = False | ||
|
||
# check the bounds | ||
if not need_to_iterate: | ||
|
||
if ((fvalues < Timestamp.min.value).any() | ||
or (fvalues > Timestamp.max.value).any()): | ||
# check the bounds | ||
if (fvalues < Timestamp.min.value).any() or ( | ||
fvalues > Timestamp.max.value | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
).any(): | ||
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") | ||
result = (iresult * m).astype('M8[ns]') | ||
iresult = result.view('i8') | ||
|
||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if prec: | ||
fvalues = round(fvalues, prec) | ||
|
||
result = fvalues.astype("M8[ns]") | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
iresult = result.view("i8") | ||
iresult[mask] = NPY_NAT | ||
|
||
return result, tz | ||
|
||
result = np.empty(n, dtype='M8[ns]') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think these need to be
def time_foo
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rewrote this