-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG/API: to_timedelta unit-argument ignored for string input #34379
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ab5f7ba
7a38ecd
a65a35a
57dea1d
b8cf952
8154859
a90a992
1184b6e
5e28b49
fafe838
3ca0dae
8105f9c
e90a18b
e0a55a8
827aebf
9bff504
b6c7d45
9ff6ade
be93e4b
85f0103
673ad19
eb7d3f8
bb40f77
2e331c1
bada4a7
8c0425a
493a14b
e525ec4
448b68a
bd173a9
d16c4c5
1cfc729
e559f06
b914950
64a4440
4d26474
db41054
2f53004
fb40c94
f5e59ee
e7266da
b6a02df
694abe9
4d86ae1
40b09f4
a9c4675
95302de
661fd70
80a1161
dbe8fae
5a0812c
b8031c2
cd17104
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from numpy cimport int64_t | ||
|
||
# Exposed for tslib, not intended for outside use. | ||
cpdef parse_timedelta_string(object ts, object specified_unit=*) | ||
cpdef int64_t delta_to_nanoseconds(delta) except? -1 | ||
cdef convert_to_timedelta64(object ts, object unit) | ||
cpdef convert_to_timedelta64(object ts, object unit=*) | ||
cdef bint is_any_td_scalar(object obj) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import collections | ||
import warnings | ||
|
||
import cython | ||
|
||
|
@@ -160,7 +161,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: | |
raise TypeError(type(delta)) | ||
|
||
|
||
cdef convert_to_timedelta64(object ts, object unit): | ||
cpdef convert_to_timedelta64(object ts, object unit=None): | ||
""" | ||
Convert an incoming object to a timedelta64 if possible. | ||
Before calling, unit must be standardized to avoid repeated unit conversion | ||
|
@@ -174,6 +175,8 @@ cdef convert_to_timedelta64(object ts, object unit): | |
|
||
Return an ns based int64 | ||
""" | ||
if unit is None: | ||
unit = 'ns' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is this called from? could we type the argument as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. e.g. in array_to_timedelta64 below we should just do that check once at the top of the function, not in every step |
||
if checknull_with_nat(ts): | ||
return np.timedelta64(NPY_NAT) | ||
elif isinstance(ts, _Timedelta): | ||
|
@@ -218,7 +221,7 @@ cdef convert_to_timedelta64(object ts, object unit): | |
|
||
@cython.boundscheck(False) | ||
@cython.wraparound(False) | ||
def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): | ||
def array_to_timedelta64(object[:] values, unit=None, errors='raise'): | ||
""" | ||
Convert an ndarray to an array of timedeltas. If errors == 'coerce', | ||
coerce non-convertible objects to NaT. Otherwise, raise. | ||
|
@@ -240,13 +243,8 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): | |
# this is where all of the error handling will take place. | ||
try: | ||
for i in range(n): | ||
if values[i] is NaT: | ||
# we allow this check in the fast-path because NaT is a C-object | ||
# so this is an inexpensive check | ||
iresult[i] = NPY_NAT | ||
else: | ||
result[i] = parse_timedelta_string(values[i]) | ||
except (TypeError, ValueError): | ||
result[i] = parse_timedelta_string(values[i], specified_unit=unit) | ||
except: | ||
unit = parse_timedelta_unit(unit) | ||
for i in range(n): | ||
try: | ||
|
@@ -260,7 +258,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): | |
return iresult.base # .base to access underlying np.ndarray | ||
|
||
|
||
cdef inline int64_t parse_timedelta_string(str ts) except? -1: | ||
cpdef inline parse_timedelta_string(object ts, specified_unit=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we really want to avoid weakening the type declarations |
||
""" | ||
Parse a regular format timedelta string. Return an int64_t (in ns) | ||
or raise a ValueError on an invalid parse. | ||
|
@@ -371,6 +369,17 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: | |
have_value = 1 | ||
have_dot = 0 | ||
|
||
# Consider units from outside | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we doing the checking at this low level? this is simply a check on the user interface, e.g. if its a string, then no unit can be specified. see how we do this in to_datetime |
||
if not unit: | ||
if specified_unit: | ||
unit = [specified_unit] | ||
else: | ||
if specified_unit: | ||
raise ValueError( | ||
"units were doubly specified, both as an argument ({}) " | ||
"and inside string ({})".format(specified_unit, unit) | ||
) | ||
|
||
# we had a dot, but we have a fractional | ||
# value since we have an unit | ||
if have_dot and len(unit): | ||
|
@@ -412,14 +421,17 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: | |
else: | ||
raise ValueError("unit abbreviation w/o a number") | ||
|
||
# treat as nanoseconds | ||
# but only if we don't have anything else | ||
# raise if we just have a number without units | ||
else: | ||
if have_value: | ||
raise ValueError("have leftover units") | ||
if len(number): | ||
r = timedelta_from_spec(number, frac, 'ns') | ||
result += timedelta_as_neg(r, neg) | ||
warnings.warn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. huh? |
||
"number string without units is deprecated and " | ||
"will raise an exception in future versions. Considering as nanoseconds.", | ||
FutureWarning | ||
) | ||
result = timedelta_from_spec(number, frac, 'ns') | ||
|
||
return result | ||
|
||
|
@@ -478,10 +490,12 @@ cpdef inline str parse_timedelta_unit(object unit): | |
------ | ||
ValueError : on non-parseable input | ||
""" | ||
if unit is None: | ||
return "ns" | ||
elif unit == "M": | ||
|
||
# Preserve unit if None, will be cast to nanoseconds | ||
# later on at the proper functions | ||
if unit is None or unit == 'M': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. double quotes |
||
return unit | ||
|
||
try: | ||
return timedelta_abbrevs[unit.lower()] | ||
except (KeyError, AttributeError): | ||
|
@@ -1158,7 +1172,7 @@ class Timedelta(_Timedelta): | |
if len(value) > 0 and value[0] == 'P': | ||
value = parse_iso_format_string(value) | ||
else: | ||
value = parse_timedelta_string(value) | ||
value = parse_timedelta_string(value, specified_unit=unit) | ||
value = np.timedelta64(value) | ||
elif PyDelta_Check(value): | ||
value = convert_to_timedelta64(value, 'ns') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -200,7 +200,7 @@ def stringify(value): | |
v = v.tz_convert("UTC") | ||
return TermValue(v, v.value, kind) | ||
elif kind == "timedelta64" or kind == "timedelta": | ||
v = Timedelta(v, unit="s").value | ||
v = Timedelta(v).value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what type is "v" here? if it is an integer, this is a behavior change |
||
return TermValue(int(v), v, kind) | ||
elif meta == "category": | ||
metadata = extract_array(self.metadata, extract_numpy=True) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
from pandas.core.arrays.timedeltas import sequence_to_td64ns | ||
|
||
|
||
def to_timedelta(arg, unit="ns", errors="raise"): | ||
def to_timedelta(arg, unit=None, box=True, errors="raise"): | ||
""" | ||
Convert argument to timedelta. | ||
|
||
|
@@ -108,7 +108,7 @@ def to_timedelta(arg, unit="ns", errors="raise"): | |
return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) | ||
|
||
|
||
def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): | ||
def _coerce_scalar_to_timedelta_type(r, unit=None, box=True, errors="raise"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why adding an unused box kwarg? |
||
"""Convert string 'r' to a timedelta object.""" | ||
try: | ||
result = Timedelta(r, unit) | ||
|
@@ -124,7 +124,7 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): | |
return result | ||
|
||
|
||
def _convert_listlike(arg, unit="ns", errors="raise", name=None): | ||
def _convert_listlike(arg, unit=None, box=True, errors="raise", name=None): | ||
"""Convert a list of objects to a timedelta index object.""" | ||
if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): | ||
# This is needed only to ensure that in the case where we end up | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you reference
:class:`Timedelta`
andpandas.to_timedelta
hereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are we deprecating this? i would simply remove it as its wrong
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jreback By "we simply DO not allow a unit when the input is a string"? Do you refer to the unit inside the string or the unit parameter? This change behaviors are like this:
to_timedelta("1s")
, used as example in https://pandas.pydata.org/docs/user_guide/timedeltas.html#to-timedeltato_timedelta("1", unit="s")
, fixes bug BUG/API: to_timedelta unit-argument ignored for string input #12136to_timedelta("1s", unit="s")
to_timedelta("1")
I simply merged @Sup3rGeo's PR #23025 without changing any behaviors. Please let me know if you want something to be changed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we spent a long time on the PR but was ultimately not merged.because of the way it was structured. Please re-read those comments. We want to handle this at a much higher level, if you pass a unit then your input must be numeric, there is no other option. I don't think PR needs to be very complicated at all.