Skip to content

Commit 3a7b1c0

Browse files
committed
API: to_datetime, required unit with numerical
All numerical types now need a unit in to_datetime call. * Update tests * Add bool verification in tslib.pyx * Bug correction with bool and unit in ['ms', 'us', 's', 'D'] * to_datetime(True, unit='ms') did run without error * to_datetime(True, unit='ms', errors='ignore') returned None Issue: pandas-dev#15836
1 parent 77bfe21 commit 3a7b1c0

File tree

10 files changed

+181
-78
lines changed

10 files changed

+181
-78
lines changed

doc/source/whatsnew/v0.21.0.txt

+78-2
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,89 @@ Other Enhancements
131131
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
132132
- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
133133

134-
135-
136134
.. _whatsnew_0210.api_breaking:
137135

138136
Backwards incompatible API changes
139137
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140138

139+
.. _whatsnew_0210.api_breaking.pandas_to_datetime:
140+
141+
Numerical values need an explicit unit in pd.to_datetime
142+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
143+
144+
- :func:`to_datetime` requires an unit with numerical arg (scalar or iterable), if not provided it raises an error (:issue:`15836`)
145+
For example:
146+
147+
.. ipython:: python
148+
149+
# Old behaviour:
150+
In [1]: pd.to_datetime(42)
151+
Out[1]: Timestamp('1970-01-01 00:00:00.000000042')
152+
153+
# New behaviour
154+
In [1]: pd.to_datetime(42)
155+
---------------------------------------------------------------------------
156+
ValueError Traceback (most recent call last)
157+
<ipython-input-1-a8ad7fa1924c> in <module>()
158+
----> 1 pd.to_datetime(42)
159+
160+
/home/anthony/src/pandas/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
161+
461 elif ((not isinstance(arg, DataFrame)) and
162+
462 (check_numerical_arg() and unit is None and format is None)):
163+
--> 463 raise ValueError("a unit is required in case of numerical arg")
164+
464
165+
465 # handle origin
166+
167+
ValueError: a unit is required in case of numerical arg
168+
169+
In [2]: pd.to_datetime(42, unit='ns')
170+
Out[2]: Timestamp('1970-01-01 00:00:00.000000042')
171+
172+
Furthermore, this change fixes a bug with boolean values.
173+
174+
.. ipython:: python
175+
# Old behaviour
176+
In [1]: pd.to_datetime(True, unit='ms')
177+
Out[1]: Timestamp('1970-01-01 00:00:00.001000')
178+
179+
# New behaviour
180+
In [2]: pd.to_datetime(True, unit='ms')
181+
---------------------------------------------------------------------------
182+
TypeError Traceback (most recent call last)
183+
<ipython-input-9-d7a95ef3ecc2> in <module>()
184+
----> 1 pd.to_datetime(True, unit='ms')
185+
186+
/home/anthony/src/pandas/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
187+
533 result = _convert_listlike(arg, box, format)
188+
534 else:
189+
--> 535 result = _convert_listlike(np.array([arg]), box, format)[0]
190+
536
191+
537 return result
192+
193+
/home/anthony/src/pandas/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
194+
374 arg = getattr(arg, 'values', arg)
195+
375 result = tslib.array_with_unit_to_datetime(arg, unit,
196+
--> 376 errors=errors)
197+
377 if box:
198+
378 if errors == 'ignore':
199+
200+
/home/anthony/src/pandas/pandas/_libs/tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()
201+
2210
202+
2211
203+
-> 2212 cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
204+
2213 """
205+
2214 convert the ndarray according to the unit
206+
207+
/home/anthony/src/pandas/pandas/_libs/tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()
208+
2246 raise TypeError("{0} is not convertible to datetime"
209+
2247 .format(values.dtype))
210+
-> 2248
211+
2249 # try a quick conversion to i8
212+
2250 # if we have nulls that are not type-compat
213+
214+
TypeError: bool is not convertible to datetime
215+
216+
Now boolean values raise an error everytime.
141217

142218
.. _whatsnew_0210.api_breaking.deps:
143219

pandas/_libs/tslib.pyx

+17-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ cdef extern from "Python.h":
3131
from libc.stdlib cimport free
3232

3333
from util cimport (is_integer_object, is_float_object, is_datetime64_object,
34-
is_timedelta64_object, INT64_MAX)
34+
is_bool_object, is_timedelta64_object, INT64_MAX)
3535
cimport util
3636

3737
# this is our datetime.pxd
@@ -2242,6 +2242,9 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22422242
m = cast_from_unit(None, unit)
22432243

22442244
if is_raise:
2245+
if np.issubdtype(values.dtype, np.bool_):
2246+
raise TypeError("{0} is not convertible to datetime"
2247+
.format(values.dtype))
22452248

22462249
# try a quick conversion to i8
22472250
# if we have nulls that are not type-compat
@@ -2277,6 +2280,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22772280
if _checknull_with_nat(val):
22782281
iresult[i] = NPY_NAT
22792282

2283+
elif is_bool_object(val):
2284+
if is_raise:
2285+
raise TypeError(
2286+
"{0} is not convertible to datetime"
2287+
.format(values.dtype)
2288+
)
2289+
elif is_ignore:
2290+
raise AssertionError
2291+
iresult[i] = NPY_NAT
2292+
22802293
elif is_integer_object(val) or is_float_object(val):
22812294

22822295
if val != val or val == NPY_NAT:
@@ -2320,7 +2333,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23202333
else:
23212334

23222335
if is_raise:
2323-
raise ValueError("non convertible value {0}"
2336+
raise ValueError("non convertible value {0} "
23242337
"with the unit '{1}'".format(
23252338
val,
23262339
unit))
@@ -2344,6 +2357,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23442357

23452358
if _checknull_with_nat(val):
23462359
oresult[i] = NaT
2360+
elif is_bool_object(val):
2361+
oresult[i] = val
23472362
elif is_integer_object(val) or is_float_object(val):
23482363

23492364
if val != val or val == NPY_NAT:

pandas/core/dtypes/cast.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def trans(x): # noqa
155155
if dtype.tz:
156156
# convert to datetime and change timezone
157157
from pandas import to_datetime
158-
result = to_datetime(result).tz_localize('utc')
158+
result = to_datetime(result, unit='ns').tz_localize('utc')
159159
result = result.tz_convert(dtype.tz)
160160

161161
except:
@@ -963,11 +963,13 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
963963
dtype):
964964
try:
965965
if is_datetime64:
966-
value = to_datetime(value, errors=errors)._values
966+
value = to_datetime(value, unit='ns',
967+
errors=errors)._values
967968
elif is_datetime64tz:
968969
# input has to be UTC at this point, so just
969970
# localize
970-
value = (to_datetime(value, errors=errors)
971+
value = (to_datetime(value, unit='ns',
972+
errors=errors)
971973
.tz_localize('UTC')
972974
.tz_convert(dtype.tz)
973975
)

pandas/core/indexes/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def __new__(cls, data=None,
277277

278278
dayfirst = kwargs.pop('dayfirst', None)
279279
yearfirst = kwargs.pop('yearfirst', None)
280+
unit = kwargs.pop('unit', None)
280281

281282
freq_infer = False
282283
if not isinstance(freq, DateOffset):
@@ -333,7 +334,7 @@ def __new__(cls, data=None,
333334
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
334335
is_integer_dtype(data)):
335336
data = tools.to_datetime(data, dayfirst=dayfirst,
336-
yearfirst=yearfirst)
337+
unit=unit, yearfirst=yearfirst)
337338

338339
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
339340

pandas/core/tools/datetimes.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
235235
- If True, require an exact format match.
236236
- If False, allow the format to match anywhere in the target string.
237237
238-
unit : string, default 'ns'
238+
unit : string, default None
239239
unit of the arg (D,s,ms,us,ns) denote the unit, which is an
240240
integer or float number. This will be based off the origin.
241241
Example, with unit='ms' and origin='unix' (the default), this
@@ -342,6 +342,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
342342
pandas.to_timedelta : Convert argument to timedelta.
343343
"""
344344
from pandas.core.indexes.datetimes import DatetimeIndex
345+
from pandas.core.frame import DataFrame
345346

346347
tz = 'utc' if utc else None
347348

@@ -451,8 +452,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
451452
except (ValueError, TypeError):
452453
raise e
453454

455+
def check_numerical_arg():
456+
return ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
457+
(is_numeric_dtype(np.asarray(arg)) and np.asarray(arg).size))
458+
454459
if arg is None:
455460
return None
461+
elif ((not isinstance(arg, DataFrame)) and
462+
(check_numerical_arg() and unit is None and format is None)):
463+
raise ValueError("a unit is required in case of numerical arg")
456464

457465
# handle origin
458466
if origin == 'julian':
@@ -479,8 +487,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
479487

480488
# arg must be a numeric
481489
original = arg
482-
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
483-
is_numeric_dtype(np.asarray(arg))):
490+
if not check_numerical_arg():
484491
raise ValueError(
485492
"'{arg}' is not compatible with origin='{origin}'; "
486493
"it must be numeric with a unit specified ".format(
@@ -605,7 +612,7 @@ def f(value):
605612
if len(excess):
606613
raise ValueError("extra keys have been passed "
607614
"to the datetime assemblage: "
608-
"[{excess}]".format(','.join(excess=excess)))
615+
"[{}]".format(','.join(excess)))
609616

610617
def coerce(values):
611618
# we allow coercion to if errors allows

0 commit comments

Comments
 (0)