Skip to content

Commit 0520b70

Browse files
committed
API: to_datetime, required unit with numerical (#15836)
* Tests * add test_to_datetime_numerical_input * add fixtures * update bool tests * Check argument * for numerical type * DataFrame with unit
1 parent 8d122e6 commit 0520b70

File tree

3 files changed

+101
-59
lines changed

3 files changed

+101
-59
lines changed

pandas/_libs/tslib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -2305,10 +2305,14 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23052305
raise AssertionError
23062306
iresult[i] = NPY_NAT
23072307

2308+
elif util.is_bool_object(val):
2309+
raise TypeError("{0} is not convertible to datetime"
2310+
.format(type(val)))
2311+
23082312
else:
23092313

23102314
if is_raise:
2311-
raise ValueError("non convertible value {0}"
2315+
raise ValueError("non convertible value {0} "
23122316
"with the unit '{1}'".format(
23132317
val,
23142318
unit))

pandas/core/tools/datetimes.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
is_float,
1515
is_list_like,
1616
is_scalar,
17-
is_numeric_dtype)
17+
is_numeric_dtype,
18+
is_bool_dtype)
1819
from pandas.core.dtypes.generic import (
1920
ABCIndexClass, ABCSeries,
2021
ABCDataFrame)
@@ -337,6 +338,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
337338
338339
"""
339340
from pandas.core.indexes.datetimes import DatetimeIndex
341+
from pandas import DataFrame
340342

341343
tz = 'utc' if utc else None
342344

@@ -446,9 +448,24 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
446448
except (ValueError, TypeError):
447449
raise e
448450

451+
def check_numerical_arg():
452+
return ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
453+
is_numeric_dtype(np.asarray(arg)))
454+
449455
if arg is None:
450456
return None
451457

458+
if unit is not None:
459+
if isinstance(arg, (DataFrame,)):
460+
raise ValueError("unit must be None if arg is a DataFrame")
461+
if is_bool_dtype(np.asarray(arg)):
462+
raise TypeError("{0} is not convertible to datetime"
463+
.format(type(arg)))
464+
else:
465+
if (format is None and check_numerical_arg() and
466+
not isinstance(arg, (DataFrame,))):
467+
raise ValueError("a unit is required in case of numerical arg")
468+
452469
# handle origin
453470
if origin == 'julian':
454471

@@ -474,8 +491,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
474491

475492
# arg must be a numeric
476493
original = arg
477-
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
478-
is_numeric_dtype(np.asarray(arg))):
494+
if not check_numerical_arg():
479495
raise ValueError(
480496
"'{arg}' is not compatible with origin='{origin}'; "
481497
"it must be numeric with a unit specified ".format(

pandas/tests/indexes/datetimes/test_tools.py

+77-55
Original file line numberDiff line numberDiff line change
@@ -307,24 +307,6 @@ def test_to_datetime_tz_psycopg2(self):
307307
dtype='datetime64[ns, UTC]')
308308
tm.assert_index_equal(result, expected)
309309

310-
def test_datetime_bool(self):
311-
# GH13176
312-
with pytest.raises(TypeError):
313-
to_datetime(False)
314-
self.assertTrue(to_datetime(False, errors="coerce") is NaT)
315-
self.assertEqual(to_datetime(False, errors="ignore"), False)
316-
with pytest.raises(TypeError):
317-
to_datetime(True)
318-
self.assertTrue(to_datetime(True, errors="coerce") is NaT)
319-
self.assertEqual(to_datetime(True, errors="ignore"), True)
320-
with pytest.raises(TypeError):
321-
to_datetime([False, datetime.today()])
322-
with pytest.raises(TypeError):
323-
to_datetime(['20130101', True])
324-
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
325-
errors="coerce"),
326-
DatetimeIndex([to_datetime(0), NaT,
327-
NaT, to_datetime(0)]))
328310

329311
def test_datetime_invalid_datatype(self):
330312
# GH13176
@@ -410,10 +392,10 @@ def test_unit_with_numeric(self):
410392
arr1 = [1.434692e+18, 1.432766e+18]
411393
arr2 = np.array(arr1).astype('int64')
412394
for errors in ['ignore', 'raise', 'coerce']:
413-
result = pd.to_datetime(arr1, errors=errors)
395+
result = pd.to_datetime(arr1, unit='ns', errors=errors)
414396
tm.assert_index_equal(result, expected)
415397

416-
result = pd.to_datetime(arr2, errors=errors)
398+
result = pd.to_datetime(arr2, unit='ns', errors=errors)
417399
tm.assert_index_equal(result, expected)
418400

419401
# but we want to make sure that we are coercing
@@ -422,15 +404,15 @@ def test_unit_with_numeric(self):
422404
'2015-06-19 05:33:20',
423405
'2015-05-27 22:33:20'])
424406
arr = ['foo', 1.434692e+18, 1.432766e+18]
425-
result = pd.to_datetime(arr, errors='coerce')
407+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
426408
tm.assert_index_equal(result, expected)
427409

428410
expected = DatetimeIndex(['2015-06-19 05:33:20',
429411
'2015-05-27 22:33:20',
430412
'NaT',
431413
'NaT'])
432414
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
433-
result = pd.to_datetime(arr, errors='coerce')
415+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
434416
tm.assert_index_equal(result, expected)
435417

436418
def test_unit_mixed(self):
@@ -442,17 +424,17 @@ def test_unit_mixed(self):
442424
tm.assert_index_equal(result, expected)
443425

444426
with pytest.raises(ValueError):
445-
pd.to_datetime(arr, errors='raise')
427+
pd.to_datetime(arr, unit='ns', errors='raise')
446428

447429
expected = DatetimeIndex(['NaT',
448430
'NaT',
449431
'2013-01-01'])
450432
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
451-
result = pd.to_datetime(arr, errors='coerce')
433+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
452434
tm.assert_index_equal(result, expected)
453435

454436
with pytest.raises(ValueError):
455-
pd.to_datetime(arr, errors='raise')
437+
pd.to_datetime(arr, unit='ns', errors='raise')
456438

457439
def test_dataframe(self):
458440

@@ -677,36 +659,6 @@ def test_to_datetime_with_apply(self):
677659
lambda x: pd.to_datetime(x, format='%b %y', errors='coerce'))
678660
assert_series_equal(result, expected)
679661

680-
def test_to_datetime_types(self):
681-
682-
# empty string
683-
result = to_datetime('')
684-
assert result is NaT
685-
686-
result = to_datetime(['', ''])
687-
self.assertTrue(isnull(result).all())
688-
689-
# ints
690-
result = Timestamp(0)
691-
expected = to_datetime(0)
692-
self.assertEqual(result, expected)
693-
694-
# GH 3888 (strings)
695-
expected = to_datetime(['2012'])[0]
696-
result = to_datetime('2012')
697-
self.assertEqual(result, expected)
698-
699-
# array = ['2012','20120101','20120101 12:01:01']
700-
array = ['20120101', '20120101 12:01:01']
701-
expected = list(to_datetime(array))
702-
result = lmap(Timestamp, array)
703-
tm.assert_almost_equal(result, expected)
704-
705-
# currently fails ###
706-
# result = Timestamp('2012')
707-
# expected = to_datetime('2012')
708-
# self.assertEqual(result, expected)
709-
710662
def test_to_datetime_unprocessable_input(self):
711663
# GH 4928
712664
tm.assert_numpy_array_equal(
@@ -1545,6 +1497,16 @@ def julian_dates():
15451497
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
15461498

15471499

1500+
@pytest.fixture(params=[True, False])
1501+
def bool_values(request):
1502+
return request.param
1503+
1504+
1505+
@pytest.fixture(params=['coerce', 'ignore'])
1506+
def errors_values(request):
1507+
return request.param
1508+
1509+
15481510
class TestOrigin(object):
15491511

15501512
def test_to_basic(self, julian_dates):
@@ -1632,3 +1594,63 @@ def test_processing_order(self):
16321594
result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01')
16331595
expected = Timestamp('2169-10-20 00:00:00')
16341596
assert result == expected
1597+
1598+
def test_datetime_dataframe_with_unit(self, units):
1599+
df = DataFrame({'year': [2000, 2001],
1600+
'month': [1.5, 1],
1601+
'day': [1, 1]})
1602+
with pytest.raises(ValueError):
1603+
to_datetime(df, unit=units)
1604+
1605+
def test_datetime_bool(self, units, bool_values, errors_values):
1606+
# GH13176
1607+
with pytest.raises(TypeError):
1608+
to_datetime(bool_values, unit=units)
1609+
with pytest.raises(TypeError):
1610+
to_datetime(bool_values, unit=units, errors=errors_values)
1611+
with pytest.raises(TypeError):
1612+
to_datetime([bool_values, datetime.today()], unit=units)
1613+
with pytest.raises(TypeError):
1614+
to_datetime(['20130101', bool_values])
1615+
1616+
tm.assert_index_equal(to_datetime([0, bool_values, NaT, 0.0],
1617+
errors="coerce"),
1618+
DatetimeIndex([to_datetime(0, unit=units), NaT,
1619+
NaT, to_datetime(0, unit=units)]))
1620+
1621+
def test_to_datetime_types(self, units):
1622+
1623+
# empty string
1624+
result = to_datetime('')
1625+
assert result is NaT
1626+
1627+
result = to_datetime(['', ''])
1628+
assert(isnull(result).all())
1629+
1630+
# ints
1631+
result = Timestamp(0)
1632+
expected = to_datetime(0, unit=units)
1633+
assert result == expected
1634+
1635+
# GH 3888 (strings)
1636+
expected = to_datetime(['2012'])[0]
1637+
result = to_datetime('2012')
1638+
assert result == expected
1639+
1640+
# array = ['2012','20120101','20120101 12:01:01']
1641+
array = ['20120101', '20120101 12:01:01']
1642+
expected = list(to_datetime(array))
1643+
result = lmap(Timestamp, array)
1644+
tm.assert_almost_equal(result, expected)
1645+
1646+
# currently fails ###
1647+
# result = Timestamp('2012')
1648+
# expected = to_datetime('2012')
1649+
# assert result == expected
1650+
1651+
@pytest.mark.parametrize('arg', [int(1), float(1), range(5),
1652+
np.array(range(5), 'd')])
1653+
def test_to_datetime_numerical_input(self, arg):
1654+
# GH15836
1655+
with pytest.raises(ValueError):
1656+
pd.to_datetime(arg)

0 commit comments

Comments
 (0)