Skip to content

Commit a411af6

Browse files
committed
API: to_datetime, required unit with numerical
All numerical types now need a unit in to_datetime call. * Update tests * Add bool verification in tslib.pyx * Bug correction with bool and unit in ['ms', 'us', 's', 'D'] * to_datetime(True, unit='ms') did run without error * to_datetime(True, unit='ms', errors='ignore') returned None Issue: pandas-dev#15836
1 parent d236f31 commit a411af6

File tree

10 files changed

+103
-75
lines changed

10 files changed

+103
-75
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Other Enhancements
4040
- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
4141
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
4242
- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
43+
- :func:`to_datetime` requires an unit with numerical arg (:issue:`15836`)
4344

4445
.. _whatsnew_0210.api_breaking:
4546

pandas/_libs/tslib.pyx

+19-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from datetime cimport cmp_pandas_datetimestruct
3333
from libc.stdlib cimport free
3434

3535
from util cimport (is_integer_object, is_float_object, is_datetime64_object,
36-
is_timedelta64_object, INT64_MAX)
36+
is_bool_object, is_timedelta64_object, INT64_MAX)
3737
cimport util
3838

3939
from datetime cimport *
@@ -2230,6 +2230,11 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22302230
m = cast_from_unit(None, unit)
22312231

22322232
if is_raise:
2233+
from pandas.core.dtypes.common import is_bool_dtype
2234+
2235+
if is_bool_dtype(values):
2236+
raise TypeError("{0} is not convertible to datetime"
2237+
.format(values.dtype))
22332238

22342239
# try a quick conversion to i8
22352240
# if we have nulls that are not type-compat
@@ -2265,6 +2270,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
22652270
if _checknull_with_nat(val):
22662271
iresult[i] = NPY_NAT
22672272

2273+
elif is_bool_object(val):
2274+
if is_raise:
2275+
raise TypeError(
2276+
"{0} is not convertible to datetime"
2277+
.format(values.dtype)
2278+
)
2279+
elif is_ignore:
2280+
raise AssertionError
2281+
iresult[i] = NPY_NAT
2282+
22682283
elif is_integer_object(val) or is_float_object(val):
22692284

22702285
if val != val or val == NPY_NAT:
@@ -2308,7 +2323,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23082323
else:
23092324

23102325
if is_raise:
2311-
raise ValueError("non convertible value {0}"
2326+
raise ValueError("non convertible value {0} "
23122327
"with the unit '{1}'".format(
23132328
val,
23142329
unit))
@@ -2332,6 +2347,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
23322347

23332348
if _checknull_with_nat(val):
23342349
oresult[i] = NaT
2350+
elif is_bool_object(val):
2351+
oresult[i] = val
23352352
elif is_integer_object(val) or is_float_object(val):
23362353

23372354
if val != val or val == NPY_NAT:

pandas/core/dtypes/cast.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def trans(x): # noqa
154154
if dtype.tz:
155155
# convert to datetime and change timezone
156156
from pandas import to_datetime
157-
result = to_datetime(result).tz_localize('utc')
157+
result = to_datetime(result, unit='ns').tz_localize('utc')
158158
result = result.tz_convert(dtype.tz)
159159

160160
except:
@@ -929,11 +929,11 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
929929
dtype):
930930
try:
931931
if is_datetime64:
932-
value = to_datetime(value, errors=errors)._values
932+
value = to_datetime(value, unit='ns', errors=errors)._values
933933
elif is_datetime64tz:
934934
# input has to be UTC at this point, so just
935935
# localize
936-
value = (to_datetime(value, errors=errors)
936+
value = (to_datetime(value, unit='ns', errors=errors)
937937
.tz_localize('UTC')
938938
.tz_convert(dtype.tz)
939939
)

pandas/core/indexes/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def __new__(cls, data=None,
277277

278278
dayfirst = kwargs.pop('dayfirst', None)
279279
yearfirst = kwargs.pop('yearfirst', None)
280+
unit = kwargs.pop('unit', None)
280281

281282
freq_infer = False
282283
if not isinstance(freq, DateOffset):
@@ -333,7 +334,7 @@ def __new__(cls, data=None,
333334
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
334335
is_integer_dtype(data)):
335336
data = tools.to_datetime(data, dayfirst=dayfirst,
336-
yearfirst=yearfirst)
337+
unit=unit, yearfirst=yearfirst)
337338

338339
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
339340

pandas/core/tools/datetimes.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
234234
- If True, require an exact format match.
235235
- If False, allow the format to match anywhere in the target string.
236236
237-
unit : string, default 'ns'
237+
unit : string
238238
unit of the arg (D,s,ms,us,ns) denote the unit, which is an
239239
integer or float number. This will be based off the origin.
240240
Example, with unit='ms' and origin='unix' (the default), this
@@ -337,6 +337,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
337337
338338
"""
339339
from pandas.core.indexes.datetimes import DatetimeIndex
340+
from pandas.core.frame import DataFrame
340341

341342
tz = 'utc' if utc else None
342343

@@ -446,8 +447,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
446447
except (ValueError, TypeError):
447448
raise e
448449

450+
def check_numerical_arg():
451+
return ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
452+
(is_numeric_dtype(np.asarray(arg)) and np.asarray(arg).size))
453+
449454
if arg is None:
450455
return None
456+
elif ((not isinstance(arg, DataFrame)) and
457+
(check_numerical_arg() and unit is None and format is None)):
458+
raise ValueError("a unit is required in case of numerical arg")
451459

452460
# handle origin
453461
if origin == 'julian':
@@ -474,8 +482,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
474482

475483
# arg must be a numeric
476484
original = arg
477-
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
478-
is_numeric_dtype(np.asarray(arg))):
485+
if not check_numerical_arg():
479486
raise ValueError(
480487
"'{arg}' is not compatible with origin='{origin}'; "
481488
"it must be numeric with a unit specified ".format(

pandas/tests/indexes/datetimes/test_tools.py

+59-57
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,35 @@
2525
compat)
2626

2727

28+
@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
29+
def units(request):
30+
return request.param
31+
32+
33+
@pytest.fixture
34+
def epoch_1960():
35+
# for origin as 1960-01-01
36+
return Timestamp('1960-01-01')
37+
38+
39+
@pytest.fixture
40+
def units_from_epochs():
41+
return list(range(5))
42+
43+
44+
@pytest.fixture(params=[epoch_1960(),
45+
epoch_1960().to_pydatetime(),
46+
epoch_1960().to_datetime64(),
47+
str(epoch_1960())])
48+
def epochs(request):
49+
return request.param
50+
51+
52+
@pytest.fixture
53+
def julian_dates():
54+
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
55+
56+
2857
class TimeConversionFormats(object):
2958

3059
def test_to_datetime_format(self):
@@ -306,25 +335,6 @@ def test_to_datetime_tz_psycopg2(self):
306335
dtype='datetime64[ns, UTC]')
307336
tm.assert_index_equal(result, expected)
308337

309-
def test_datetime_bool(self):
310-
# GH13176
311-
with pytest.raises(TypeError):
312-
to_datetime(False)
313-
assert to_datetime(False, errors="coerce") is NaT
314-
assert to_datetime(False, errors="ignore") is False
315-
with pytest.raises(TypeError):
316-
to_datetime(True)
317-
assert to_datetime(True, errors="coerce") is NaT
318-
assert to_datetime(True, errors="ignore") is True
319-
with pytest.raises(TypeError):
320-
to_datetime([False, datetime.today()])
321-
with pytest.raises(TypeError):
322-
to_datetime(['20130101', True])
323-
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
324-
errors="coerce"),
325-
DatetimeIndex([to_datetime(0), NaT,
326-
NaT, to_datetime(0)]))
327-
328338
def test_datetime_invalid_datatype(self):
329339
# GH13176
330340

@@ -334,7 +344,28 @@ def test_datetime_invalid_datatype(self):
334344
pd.to_datetime(pd.to_datetime)
335345

336346

337-
class ToDatetimeUnit(object):
347+
class TestToDatetimeUnit(object):
348+
349+
def test_datetime_bool(self, units):
350+
# GH13176
351+
#import pdb; pdb.set_trace()
352+
with pytest.raises(TypeError):
353+
to_datetime(False, unit=units)
354+
assert to_datetime(False, unit=units, errors="coerce") is NaT
355+
assert to_datetime(False, unit=units, errors="ignore") == False
356+
with pytest.raises(TypeError):
357+
to_datetime(True, unit=units)
358+
assert to_datetime(True, unit=units, errors="coerce") is NaT
359+
assert to_datetime(True, unit=units, errors="ignore") == True
360+
with pytest.raises(TypeError):
361+
to_datetime([False, datetime.today()], unit=units)
362+
with pytest.raises(TypeError):
363+
to_datetime([True, '20130101'], unit=units)
364+
365+
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
366+
errors="coerce"),
367+
DatetimeIndex([to_datetime(0, unit=units), NaT,
368+
NaT, to_datetime(0, unit=units)]))
338369

339370
def test_unit(self):
340371
# GH 11758
@@ -409,10 +440,10 @@ def test_unit_with_numeric(self):
409440
arr1 = [1.434692e+18, 1.432766e+18]
410441
arr2 = np.array(arr1).astype('int64')
411442
for errors in ['ignore', 'raise', 'coerce']:
412-
result = pd.to_datetime(arr1, errors=errors)
443+
result = pd.to_datetime(arr1, unit='ns', errors=errors)
413444
tm.assert_index_equal(result, expected)
414445

415-
result = pd.to_datetime(arr2, errors=errors)
446+
result = pd.to_datetime(arr2, unit='ns', errors=errors)
416447
tm.assert_index_equal(result, expected)
417448

418449
# but we want to make sure that we are coercing
@@ -421,37 +452,37 @@ def test_unit_with_numeric(self):
421452
'2015-06-19 05:33:20',
422453
'2015-05-27 22:33:20'])
423454
arr = ['foo', 1.434692e+18, 1.432766e+18]
424-
result = pd.to_datetime(arr, errors='coerce')
455+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
425456
tm.assert_index_equal(result, expected)
426457

427458
expected = DatetimeIndex(['2015-06-19 05:33:20',
428459
'2015-05-27 22:33:20',
429460
'NaT',
430461
'NaT'])
431462
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
432-
result = pd.to_datetime(arr, errors='coerce')
463+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
433464
tm.assert_index_equal(result, expected)
434465

435466
def test_unit_mixed(self):
436467

437468
# mixed integers/datetimes
438469
expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
439470
arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
440-
result = pd.to_datetime(arr, errors='coerce')
471+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
441472
tm.assert_index_equal(result, expected)
442473

443474
with pytest.raises(ValueError):
444-
pd.to_datetime(arr, errors='raise')
475+
pd.to_datetime(arr, unit='ns', errors='raise')
445476

446477
expected = DatetimeIndex(['NaT',
447478
'NaT',
448479
'2013-01-01'])
449480
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
450-
result = pd.to_datetime(arr, errors='coerce')
481+
result = pd.to_datetime(arr, unit='ns', errors='coerce')
451482
tm.assert_index_equal(result, expected)
452483

453484
with pytest.raises(ValueError):
454-
pd.to_datetime(arr, errors='raise')
485+
pd.to_datetime(arr, unit='ns', errors='raise')
455486

456487
def test_dataframe(self):
457488

@@ -1488,35 +1519,6 @@ def test_normalize_date():
14881519
assert (result == datetime(2012, 9, 7))
14891520

14901521

1491-
@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
1492-
def units(request):
1493-
return request.param
1494-
1495-
1496-
@pytest.fixture
1497-
def epoch_1960():
1498-
# for origin as 1960-01-01
1499-
return Timestamp('1960-01-01')
1500-
1501-
1502-
@pytest.fixture
1503-
def units_from_epochs():
1504-
return list(range(5))
1505-
1506-
1507-
@pytest.fixture(params=[epoch_1960(),
1508-
epoch_1960().to_pydatetime(),
1509-
epoch_1960().to_datetime64(),
1510-
str(epoch_1960())])
1511-
def epochs(request):
1512-
return request.param
1513-
1514-
1515-
@pytest.fixture
1516-
def julian_dates():
1517-
return pd.date_range('2014-1-1', periods=10).to_julian_date().values
1518-
1519-
15201522
class TestOrigin(object):
15211523

15221524
def test_to_basic(self, julian_dates):

pandas/tests/io/json/test_json_table_schema.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -349,15 +349,15 @@ def test_make_field_float(self):
349349

350350
def test_make_field_datetime(self):
351351
data = [1., 2., 3.]
352-
kinds = [pd.Series(pd.to_datetime(data), name='values'),
353-
pd.to_datetime(data)]
352+
kinds = [pd.Series(pd.to_datetime(data, unit='ns'), name='values'),
353+
pd.to_datetime(data, unit='ns')]
354354
for kind in kinds:
355355
result = make_field(kind)
356356
expected = {"name": "values", "type": 'datetime'}
357357
assert result == expected
358358

359-
kinds = [pd.Series(pd.to_datetime(data, utc=True), name='values'),
360-
pd.to_datetime(data, utc=True)]
359+
kinds = [pd.Series(pd.to_datetime(data, unit='ns', utc=True), name='values'),
360+
pd.to_datetime(data, unit='ns', utc=True)]
361361
for kind in kinds:
362362
result = make_field(kind)
363363
expected = {"name": "values", "type": 'datetime', "tz": "UTC"}

pandas/tests/io/json/test_pandas.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def _check_orient(df, orient, dtype=None, numpy=False,
181181

182182
if not convert_axes and df.index.dtype.type == np.datetime64:
183183
unser.index = DatetimeIndex(
184-
unser.index.values.astype('i8') * 1e6)
184+
unser.index.values.astype('i8') * 1e6, unit='ns')
185185
if orient == "records":
186186
# index is not captured in this orientation
187187
tm.assert_almost_equal(df.values, unser.values,
@@ -832,7 +832,7 @@ def test_timedelta(self):
832832

833833
result = pd.read_json(frame.to_json(date_unit='ns'))
834834
result['a'] = pd.to_timedelta(result.a, unit='ns')
835-
result['c'] = pd.to_datetime(result.c)
835+
result['c'] = pd.to_datetime(result.c, unit='ns')
836836
assert_frame_equal(frame, result)
837837

838838
def test_mixed_timedelta_datetime(self):

pandas/tests/test_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
9898
assert m is not None, "incompatible typestr -> {0}".format(typestr)
9999
tz = m.groups()[0]
100100
assert num_items == 1, "must have only 1 num items for a tz-aware"
101-
values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
101+
values = DatetimeIndex(np.arange(N) * 1e9, unit='ns', tz=tz)
102102
elif typestr in ('timedelta', 'td', 'm8[ns]'):
103103
values = (mat * 1).astype('m8[ns]')
104104
elif typestr in ('category', ):

pandas/tests/test_resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1720,7 +1720,7 @@ def test_nanosecond_resample_error(self):
17201720
start = 1443707890427
17211721
exp_start = 1443707890400
17221722
indx = pd.date_range(
1723-
start=pd.to_datetime(start),
1723+
start=pd.to_datetime(start, unit='ns'),
17241724
periods=10,
17251725
freq='100n'
17261726
)
@@ -1729,7 +1729,7 @@ def test_nanosecond_resample_error(self):
17291729
result = r.agg('mean')
17301730

17311731
exp_indx = pd.date_range(
1732-
start=pd.to_datetime(exp_start),
1732+
start=pd.to_datetime(exp_start, unit='ns'),
17331733
periods=10,
17341734
freq='100n'
17351735
)

0 commit comments

Comments
 (0)