Skip to content

Commit 904e45c

Browse files
committed
API: to_datetime, required unit with numerical
All numerical types now need a unit in to_datetime call. * Update tests * Add bool verification in tslib.pyx * Bug correction with bool and unit in ['ms', 'us', 's', 'D'] * to_datetime(True, unit='ms') did run without error * to_datetime(True, unit='ms', errors='ignore') returned None Issue: pandas-dev#15836
1 parent a9421af commit 904e45c

File tree

10 files changed

+108
-78
lines changed

10 files changed

+108
-78
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Other Enhancements
4040
- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
4141
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
4242
- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
43+
- :func:`to_datetime` requires an unit with numerical arg (:issue:`15836`)
4344

4445
.. _whatsnew_0210.api_breaking:
4546

pandas/_libs/tslib.pyx

+19-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from datetime cimport cmp_pandas_datetimestruct
3333
from libc.stdlib cimport free
3434

3535
from util cimport (is_integer_object, is_float_object, is_datetime64_object,
36-
is_timedelta64_object, INT64_MAX)
36+
is_bool_object, is_timedelta64_object, INT64_MAX)
3737
cimport util
3838

3939
from datetime cimport *
@@ -2230,6 +2230,11 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22302230
m = cast_from_unit(None, unit)
22312231

22322232
if is_raise:
2233+
from pandas.core.dtypes.common import is_bool_dtype
2234+
2235+
if is_bool_dtype(values):
2236+
raise TypeError("{0} is not convertible to datetime"
2237+
.format(values.dtype))
22332238

22342239
# try a quick conversion to i8
22352240
# if we have nulls that are not type-compat
@@ -2265,6 +2270,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22652270
if _checknull_with_nat(val):
22662271
iresult[i] = NPY_NAT
22672272

2273+
elif is_bool_object(val):
2274+
if is_raise:
2275+
raise TypeError(
2276+
"{0} is not convertible to datetime"
2277+
.format(values.dtype)
2278+
)
2279+
elif is_ignore:
2280+
raise AssertionError
2281+
iresult[i] = NPY_NAT
2282+
22682283
elif is_integer_object(val) or is_float_object(val):
22692284

22702285
if val != val or val == NPY_NAT:
@@ -2308,7 +2323,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23082323
else:
23092324

23102325
if is_raise:
2311-
raise ValueError("non convertible value {0}"
2326+
raise ValueError("non convertible value {0} "
23122327
"with the unit '{1}'".format(
23132328
val,
23142329
unit))
@@ -2332,6 +2347,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23322347

23332348
if _checknull_with_nat(val):
23342349
oresult[i] = NaT
2350+
elif is_bool_object(val):
2351+
oresult[i] = val
23352352
elif is_integer_object(val) or is_float_object(val):
23362353

23372354
if val != val or val == NPY_NAT:

pandas/core/dtypes/cast.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def trans(x): # noqa
154154
if dtype.tz:
155155
# convert to datetime and change timezone
156156
from pandas import to_datetime
157-
result = to_datetime(result).tz_localize('utc')
157+
result = to_datetime(result, unit='ns').tz_localize('utc')
158158
result = result.tz_convert(dtype.tz)
159159

160160
except:
@@ -929,14 +929,16 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
929929
dtype):
930930
try:
931931
if is_datetime64:
932-
value = to_datetime(value, errors=errors)._values
932+
value = to_datetime(value, unit='ns',
933+
errors=errors)._values
933934
elif is_datetime64tz:
934935
# input has to be UTC at this point, so just
935936
# localize
936-
value = (to_datetime(value, errors=errors)
937-
.tz_localize('UTC')
938-
.tz_convert(dtype.tz)
939-
)
937+
value = (
938+
to_datetime(value, unit='ns', errors=errors)
939+
.tz_localize('UTC')
940+
.tz_convert(dtype.tz)
941+
)
940942
elif is_timedelta64:
941943
value = to_timedelta(value, errors=errors)._values
942944
except (AttributeError, ValueError, TypeError):

pandas/core/indexes/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def __new__(cls, data=None,
277277

278278
dayfirst = kwargs.pop('dayfirst', None)
279279
yearfirst = kwargs.pop('yearfirst', None)
280+
unit = kwargs.pop('unit', None)
280281

281282
freq_infer = False
282283
if not isinstance(freq, DateOffset):
@@ -333,7 +334,7 @@ def __new__(cls, data=None,
333334
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
334335
is_integer_dtype(data)):
335336
data = tools.to_datetime(data, dayfirst=dayfirst,
336-
yearfirst=yearfirst)
337+
unit=unit, yearfirst=yearfirst)
337338

338339
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
339340

pandas/core/tools/datetimes.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
234234
- If True, require an exact format match.
235235
- If False, allow the format to match anywhere in the target string.
236236
237-
unit : string, default 'ns'
237+
unit : string
238238
unit of the arg (D,s,ms,us,ns) denote the unit, which is an
239239
integer or float number. This will be based off the origin.
240240
Example, with unit='ms' and origin='unix' (the default), this
@@ -337,6 +337,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
337337
338338
"""
339339
from pandas.core.indexes.datetimes import DatetimeIndex
340+
from pandas.core.frame import DataFrame
340341

341342
tz = 'utc' if utc else None
342343

@@ -446,8 +447,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
446447
except (ValueError, TypeError):
447448
raise e
448449

450+
def check_numerical_arg():
451+
return ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
452+
(is_numeric_dtype(np.asarray(arg)) and np.asarray(arg).size))
453+
449454
if arg is None:
450455
return None
456+
elif ((not isinstance(arg, DataFrame)) and
457+
(check_numerical_arg() and unit is None and format is None)):
458+
raise ValueError("a unit is required in case of numerical arg")
451459

452460
# handle origin
453461
if origin == 'julian':
@@ -474,8 +482,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
474482

475483
# arg must be a numeric
476484
original = arg
477-
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
478-
is_numeric_dtype(np.asarray(arg))):
485+
if not check_numerical_arg():
479486
raise ValueError(
480487
"'{arg}' is not compatible with origin='{origin}'; "
481488
"it must be numeric with a unit specified ".format(

pandas/tests/indexes/datetimes/test_tools.py

+58-57
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,35 @@
2525
compat)
2626

2727

28+
@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
29+
def units(request):
30+
return request.param
31+
32+
33+
@pytest.fixture
34+
def epoch_1960():
35+
# for origin as 1960-01-01
36+
return Timestamp('1960-01-01')
37+
38+
39+
@pytest.fixture
40+
def units_from_epochs():
41+
return list(range(5))
42+
43+
44+
@pytest.fixture(params=[epoch_1960(),
45+
epoch_1960().to_pydatetime(),
46+
epoch_1960().to_datetime64(),
47+
str(epoch_1960())])
48+
def epochs(request):
49+
return request.param
50+
51+
52+
@pytest.fixture
53+
def julian_dates():
54+
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
55+
56+
2857
class TimeConversionFormats(object):
2958

3059
def test_to_datetime_format(self):
@@ -306,25 +335,6 @@ def test_to_datetime_tz_psycopg2(self):
306335
dtype='datetime64[ns, UTC]')
307336
tm.assert_index_equal(result, expected)
308337

309-
def test_datetime_bool(self):
310-
# GH13176
311-
with pytest.raises(TypeError):
312-
to_datetime(False)
313-
assert to_datetime(False, errors="coerce") is NaT
314-
assert to_datetime(False, errors="ignore") is False
315-
with pytest.raises(TypeError):
316-
to_datetime(True)
317-
assert to_datetime(True, errors="coerce") is NaT
318-
assert to_datetime(True, errors="ignore") is True
319-
with pytest.raises(TypeError):
320-
to_datetime([False, datetime.today()])
321-
with pytest.raises(TypeError):
322-
to_datetime(['20130101', True])
323-
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
324-
errors="coerce"),
325-
DatetimeIndex([to_datetime(0), NaT,
326-
NaT, to_datetime(0)]))
327-
328338
def test_datetime_invalid_datatype(self):
329339
# GH13176
330340

@@ -334,7 +344,27 @@ def test_datetime_invalid_datatype(self):
334344
pd.to_datetime(pd.to_datetime)
335345

336346

337-
class ToDatetimeUnit(object):
347+
class TestToDatetimeUnit(object):
348+
349+
def test_datetime_bool(self, units):
350+
# GH13176
351+
with pytest.raises(TypeError):
352+
to_datetime(False, unit=units)
353+
assert to_datetime(False, unit=units, errors="coerce") is NaT
354+
assert to_datetime(False, unit=units, errors="ignore") == False
355+
with pytest.raises(TypeError):
356+
to_datetime(True, unit=units)
357+
assert to_datetime(True, unit=units, errors="coerce") is NaT
358+
assert to_datetime(True, unit=units, errors="ignore") == True
359+
with pytest.raises(TypeError):
360+
to_datetime([False, datetime.today()], unit=units)
361+
with pytest.raises(TypeError):
362+
to_datetime([True, '20130101'], unit=units)
363+
364+
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
365+
errors="coerce"),
366+
DatetimeIndex([to_datetime(0, unit=units), NaT,
367+
NaT, to_datetime(0, unit=units)]))
338368

339369
def test_unit(self):
340370
# GH 11758
@@ -409,10 +439,10 @@ def test_unit_with_numeric(self):
409439
arr1 = [1.434692e+18, 1.432766e+18]
410440
arr2 = np.array(arr1).astype('int64')
411441
for errors in ['ignore', 'raise', 'coerce']:
412-
result = pd.to_datetime(arr1, errors=errors)
442+
result = pd.to_datetime(arr1, unit='ns', errors=errors)
413443
tm.assert_index_equal(result, expected)
414444

415-
result = pd.to_datetime(arr2, errors=errors)
445+
result = pd.to_datetime(arr2, unit='ns', errors=errors)
416446
tm.assert_index_equal(result, expected)
417447

418448
# but we want to make sure that we are coercing
@@ -421,37 +451,37 @@ def test_unit_with_numeric(self):
421451
'2015-06-19 05:33:20',
422452
'2015-05-27 22:33:20'])
423453
arr = ['foo', 1.434692e+18, 1.432766e+18]
424-
result = pd.to_datetime(arr, errors='coerce')
454+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
425455
tm.assert_index_equal(result, expected)
426456

427457
expected = DatetimeIndex(['2015-06-19 05:33:20',
428458
'2015-05-27 22:33:20',
429459
'NaT',
430460
'NaT'])
431461
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
432-
result = pd.to_datetime(arr, errors='coerce')
462+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
433463
tm.assert_index_equal(result, expected)
434464

435465
def test_unit_mixed(self):
436466

437467
# mixed integers/datetimes
438468
expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
439469
arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
440-
result = pd.to_datetime(arr, errors='coerce')
470+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
441471
tm.assert_index_equal(result, expected)
442472

443473
with pytest.raises(ValueError):
444-
pd.to_datetime(arr, errors='raise')
474+
pd.to_datetime(arr, unit='ns', errors='raise')
445475

446476
expected = DatetimeIndex(['NaT',
447477
'NaT',
448478
'2013-01-01'])
449479
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
450-
result = pd.to_datetime(arr, errors='coerce')
480+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
451481
tm.assert_index_equal(result, expected)
452482

453483
with pytest.raises(ValueError):
454-
pd.to_datetime(arr, errors='raise')
484+
pd.to_datetime(arr, unit='ns', errors='raise')
455485

456486
def test_dataframe(self):
457487

@@ -1488,35 +1518,6 @@ def test_normalize_date():
14881518
assert (result == datetime(2012, 9, 7))
14891519

14901520

1491-
@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
1492-
def units(request):
1493-
return request.param
1494-
1495-
1496-
@pytest.fixture
1497-
def epoch_1960():
1498-
# for origin as 1960-01-01
1499-
return Timestamp('1960-01-01')
1500-
1501-
1502-
@pytest.fixture
1503-
def units_from_epochs():
1504-
return list(range(5))
1505-
1506-
1507-
@pytest.fixture(params=[epoch_1960(),
1508-
epoch_1960().to_pydatetime(),
1509-
epoch_1960().to_datetime64(),
1510-
str(epoch_1960())])
1511-
def epochs(request):
1512-
return request.param
1513-
1514-
1515-
@pytest.fixture
1516-
def julian_dates():
1517-
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
1518-
1519-
15201521
class TestOrigin(object):
15211522

15221523
def test_to_basic(self, julian_dates):

pandas/tests/io/json/test_json_table_schema.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -349,15 +349,16 @@ def test_make_field_float(self):
349349

350350
def test_make_field_datetime(self):
351351
data = [1., 2., 3.]
352-
kinds = [pd.Series(pd.to_datetime(data), name='values'),
353-
pd.to_datetime(data)]
352+
kinds = [pd.Series(pd.to_datetime(data, unit='ns'), name='values'),
353+
pd.to_datetime(data, unit='ns')]
354354
for kind in kinds:
355355
result = make_field(kind)
356356
expected = {"name": "values", "type": 'datetime'}
357357
assert result == expected
358358

359-
kinds = [pd.Series(pd.to_datetime(data, utc=True), name='values'),
360-
pd.to_datetime(data, utc=True)]
359+
kinds = [pd.Series(pd.to_datetime(data, unit='ns', utc=True),
360+
name='values'),
361+
pd.to_datetime(data, unit='ns', utc=True)]
361362
for kind in kinds:
362363
result = make_field(kind)
363364
expected = {"name": "values", "type": 'datetime', "tz": "UTC"}

pandas/tests/io/json/test_pandas.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def _check_orient(df, orient, dtype=None, numpy=False,
181181

182182
if not convert_axes and df.index.dtype.type == np.datetime64:
183183
unser.index = DatetimeIndex(
184-
unser.index.values.astype('i8') * 1e6)
184+
unser.index.values.astype('i8') * 1e6, unit='ns')
185185
if orient == "records":
186186
# index is not captured in this orientation
187187
tm.assert_almost_equal(df.values, unser.values,
@@ -832,7 +832,7 @@ def test_timedelta(self):
832832

833833
result = pd.read_json(frame.to_json(date_unit='ns'))
834834
result['a'] = pd.to_timedelta(result.a, unit='ns')
835-
result['c'] = pd.to_datetime(result.c)
835+
result['c'] = pd.to_datetime(result.c, unit='ns')
836836
assert_frame_equal(frame, result)
837837

838838
def test_mixed_timedelta_datetime(self):

pandas/tests/test_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
9898
assert m is not None, "incompatible typestr -> {0}".format(typestr)
9999
tz = m.groups()[0]
100100
assert num_items == 1, "must have only 1 num items for a tz-aware"
101-
values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
101+
values = DatetimeIndex(np.arange(N) * 1e9, unit='ns', tz=tz)
102102
elif typestr in ('timedelta', 'td', 'm8[ns]'):
103103
values = (mat * 1).astype('m8[ns]')
104104
elif typestr in ('category', ):

0 commit comments

Comments
 (0)