Skip to content

Commit e569a0f

Browse files
committed
[WiP] API: to_datetime, required unit with numerical (pandas-dev#15836)
* Tests * add test_to_datetime_numerical_input * add fixtures * update bool tests * Check argument * for numerical type * DataFrame with unit
1 parent 0ea0f25 commit e569a0f

File tree

5 files changed

+104
-63
lines changed

5 files changed

+104
-63
lines changed

pandas/_libs/tslib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -2305,10 +2305,14 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23052305
raise AssertionError
23062306
iresult[i] = NPY_NAT
23072307

2308+
elif util.is_bool_object(val):
2309+
raise TypeError("{0} is not convertible to datetime"
2310+
.format(type(val)))
2311+
23082312
else:
23092313

23102314
if is_raise:
2311-
raise ValueError("non convertible value {0}"
2315+
raise ValueError("non convertible value {0} "
23122316
"with the unit '{1}'".format(
23132317
val,
23142318
unit))

pandas/core/indexes/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def __new__(cls, data=None,
332332
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
333333
is_integer_dtype(data)):
334334
data = tools.to_datetime(data, dayfirst=dayfirst,
335-
yearfirst=yearfirst)
335+
unit='ns', yearfirst=yearfirst)
336336

337337
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
338338

pandas/core/tools/datetimes.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
is_float,
1515
is_list_like,
1616
is_scalar,
17-
is_numeric_dtype)
17+
is_numeric_dtype,
18+
is_bool_dtype)
1819
from pandas.core.dtypes.generic import (
1920
ABCIndexClass, ABCSeries,
2021
ABCDataFrame)
@@ -337,6 +338,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
337338
338339
"""
339340
from pandas.core.indexes.datetimes import DatetimeIndex
341+
from pandas import DataFrame
340342

341343
tz = 'utc' if utc else None
342344

@@ -446,9 +448,24 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
446448
except (ValueError, TypeError):
447449
raise e
448450

451+
def check_numerical_arg():
452+
return ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
453+
is_numeric_dtype(np.asarray(arg)))
454+
449455
if arg is None:
450456
return None
451457

458+
if unit is not None:
459+
if isinstance(arg, (DataFrame,)):
460+
raise ValueError("unit must be None if arg is a DataFrame")
461+
if is_bool_dtype(np.asarray(arg)):
462+
raise TypeError("{0} is not convertible to datetime"
463+
.format(type(arg)))
464+
else:
465+
if (format is None and check_numerical_arg() and
466+
not isinstance(arg, (DataFrame,)) and np.asarray(arg).size):
467+
raise ValueError("a unit is required in case of numerical arg")
468+
452469
# handle origin
453470
if origin == 'julian':
454471

@@ -474,8 +491,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
474491

475492
# arg must be a numeric
476493
original = arg
477-
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
478-
is_numeric_dtype(np.asarray(arg))):
494+
if not check_numerical_arg():
479495
raise ValueError(
480496
"'{arg}' is not compatible with origin='{origin}'; "
481497
"it must be numeric with a unit specified ".format(

pandas/tests/indexes/datetimes/test_tools.py

+77-56
Original file line numberDiff line numberDiff line change
@@ -306,25 +306,6 @@ def test_to_datetime_tz_psycopg2(self):
306306
dtype='datetime64[ns, UTC]')
307307
tm.assert_index_equal(result, expected)
308308

309-
def test_datetime_bool(self):
310-
# GH13176
311-
with pytest.raises(TypeError):
312-
to_datetime(False)
313-
assert to_datetime(False, errors="coerce") is NaT
314-
assert to_datetime(False, errors="ignore") is False
315-
with pytest.raises(TypeError):
316-
to_datetime(True)
317-
assert to_datetime(True, errors="coerce") is NaT
318-
assert to_datetime(True, errors="ignore") is True
319-
with pytest.raises(TypeError):
320-
to_datetime([False, datetime.today()])
321-
with pytest.raises(TypeError):
322-
to_datetime(['20130101', True])
323-
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
324-
errors="coerce"),
325-
DatetimeIndex([to_datetime(0), NaT,
326-
NaT, to_datetime(0)]))
327-
328309
def test_datetime_invalid_datatype(self):
329310
# GH13176
330311

@@ -409,10 +390,10 @@ def test_unit_with_numeric(self):
409390
arr1 = [1.434692e+18, 1.432766e+18]
410391
arr2 = np.array(arr1).astype('int64')
411392
for errors in ['ignore', 'raise', 'coerce']:
412-
result = pd.to_datetime(arr1, errors=errors)
393+
result = pd.to_datetime(arr1, unit='ns', errors=errors)
413394
tm.assert_index_equal(result, expected)
414395

415-
result = pd.to_datetime(arr2, errors=errors)
396+
result = pd.to_datetime(arr2, unit='ns', errors=errors)
416397
tm.assert_index_equal(result, expected)
417398

418399
# but we want to make sure that we are coercing
@@ -421,15 +402,15 @@ def test_unit_with_numeric(self):
421402
'2015-06-19 05:33:20',
422403
'2015-05-27 22:33:20'])
423404
arr = ['foo', 1.434692e+18, 1.432766e+18]
424-
result = pd.to_datetime(arr, errors='coerce')
405+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
425406
tm.assert_index_equal(result, expected)
426407

427408
expected = DatetimeIndex(['2015-06-19 05:33:20',
428409
'2015-05-27 22:33:20',
429410
'NaT',
430411
'NaT'])
431412
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
432-
result = pd.to_datetime(arr, errors='coerce')
413+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
433414
tm.assert_index_equal(result, expected)
434415

435416
def test_unit_mixed(self):
@@ -441,17 +422,17 @@ def test_unit_mixed(self):
441422
tm.assert_index_equal(result, expected)
442423

443424
with pytest.raises(ValueError):
444-
pd.to_datetime(arr, errors='raise')
425+
pd.to_datetime(arr, unit='ns', errors='raise')
445426

446427
expected = DatetimeIndex(['NaT',
447428
'NaT',
448429
'2013-01-01'])
449430
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
450-
result = pd.to_datetime(arr, errors='coerce')
431+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
451432
tm.assert_index_equal(result, expected)
452433

453434
with pytest.raises(ValueError):
454-
pd.to_datetime(arr, errors='raise')
435+
pd.to_datetime(arr, unit='ns', errors='raise')
455436

456437
def test_dataframe(self):
457438

@@ -676,36 +657,6 @@ def test_to_datetime_with_apply(self):
676657
lambda x: pd.to_datetime(x, format='%b %y', errors='coerce'))
677658
assert_series_equal(result, expected)
678659

679-
def test_to_datetime_types(self):
680-
681-
# empty string
682-
result = to_datetime('')
683-
assert result is NaT
684-
685-
result = to_datetime(['', ''])
686-
assert isnull(result).all()
687-
688-
# ints
689-
result = Timestamp(0)
690-
expected = to_datetime(0)
691-
assert result == expected
692-
693-
# GH 3888 (strings)
694-
expected = to_datetime(['2012'])[0]
695-
result = to_datetime('2012')
696-
assert result == expected
697-
698-
# array = ['2012','20120101','20120101 12:01:01']
699-
array = ['20120101', '20120101 12:01:01']
700-
expected = list(to_datetime(array))
701-
result = lmap(Timestamp, array)
702-
tm.assert_almost_equal(result, expected)
703-
704-
# currently fails ###
705-
# result = Timestamp('2012')
706-
# expected = to_datetime('2012')
707-
# assert result == expected
708-
709660
def test_to_datetime_unprocessable_input(self):
710661
# GH 4928
711662
tm.assert_numpy_array_equal(
@@ -1517,6 +1468,16 @@ def julian_dates():
15171468
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
15181469

15191470

1471+
@pytest.fixture(params=[True, False])
1472+
def bool_values(request):
1473+
return request.param
1474+
1475+
1476+
@pytest.fixture(params=['coerce', 'ignore'])
1477+
def errors_values(request):
1478+
return request.param
1479+
1480+
15201481
class TestOrigin(object):
15211482

15221483
def test_to_basic(self, julian_dates):
@@ -1604,3 +1565,63 @@ def test_processing_order(self):
16041565
result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01')
16051566
expected = Timestamp('2169-10-20 00:00:00')
16061567
assert result == expected
1568+
1569+
def test_datetime_dataframe_with_unit(self, units):
1570+
df = DataFrame({'year': [2000, 2001],
1571+
'month': [1.5, 1],
1572+
'day': [1, 1]})
1573+
with pytest.raises(ValueError):
1574+
to_datetime(df, unit=units)
1575+
1576+
def test_datetime_bool(self, units, bool_values, errors_values):
1577+
# GH13176
1578+
with pytest.raises(TypeError):
1579+
to_datetime(bool_values, unit=units)
1580+
with pytest.raises(TypeError):
1581+
to_datetime(bool_values, unit=units, errors=errors_values)
1582+
with pytest.raises(TypeError):
1583+
to_datetime([bool_values, datetime.today()], unit=units)
1584+
with pytest.raises(TypeError):
1585+
to_datetime(['20130101', bool_values])
1586+
1587+
tm.assert_index_equal(to_datetime([0, bool_values, NaT, 0.0],
1588+
errors="coerce"),
1589+
DatetimeIndex([to_datetime(0, unit=units), NaT,
1590+
NaT, to_datetime(0, unit=units)]))
1591+
1592+
def test_to_datetime_types(self, units):
1593+
1594+
# empty string
1595+
result = to_datetime('')
1596+
assert result is NaT
1597+
1598+
result = to_datetime(['', ''])
1599+
assert(isnull(result).all())
1600+
1601+
# ints
1602+
result = Timestamp(0)
1603+
expected = to_datetime(0, unit=units)
1604+
assert result == expected
1605+
1606+
# GH 3888 (strings)
1607+
expected = to_datetime(['2012'])[0]
1608+
result = to_datetime('2012')
1609+
assert result == expected
1610+
1611+
# array = ['2012','20120101','20120101 12:01:01']
1612+
array = ['20120101', '20120101 12:01:01']
1613+
expected = list(to_datetime(array))
1614+
result = lmap(Timestamp, array)
1615+
tm.assert_almost_equal(result, expected)
1616+
1617+
# currently fails ###
1618+
# result = Timestamp('2012')
1619+
# expected = to_datetime('2012')
1620+
# assert result == expected
1621+
1622+
@pytest.mark.parametrize('arg', [int(1), float(1), range(5),
1623+
np.array(range(5), 'd')])
1624+
def test_to_datetime_numerical_input(self, arg):
1625+
# GH15836
1626+
with pytest.raises(ValueError):
1627+
pd.to_datetime(arg)

pandas/tests/test_resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1694,7 +1694,7 @@ def test_nanosecond_resample_error(self):
16941694
start = 1443707890427
16951695
exp_start = 1443707890400
16961696
indx = pd.date_range(
1697-
start=pd.to_datetime(start),
1697+
start=pd.to_datetime(start, unit='ns'),
16981698
periods=10,
16991699
freq='100n'
17001700
)
@@ -1703,7 +1703,7 @@ def test_nanosecond_resample_error(self):
17031703
result = r.agg('mean')
17041704

17051705
exp_indx = pd.date_range(
1706-
start=pd.to_datetime(exp_start),
1706+
start=pd.to_datetime(exp_start, unit='ns'),
17071707
periods=10,
17081708
freq='100n'
17091709
)

0 commit comments

Comments
 (0)