Skip to content

Commit 123f2ee

Browse files
committed
BUG: Bug in .to_datetime() when passing integers or floats, no unit and errors=coerce
closes #13180 Author: Jeff Reback <[email protected]> Closes #13183 from jreback/coerce and squashes the following commits: 0076151 [Jeff Reback] BUG: Bug in .to_datetime() when passing integers or floats, no unit and errors=coerce
1 parent fecb2ca commit 123f2ee

File tree

5 files changed

+176
-81
lines changed

5 files changed

+176
-81
lines changed

doc/source/whatsnew/v0.18.2.txt

+13-1
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,26 @@ New Behavior:
7777
type(s.tolist()[0])
7878

7979

80+
.. _whatsnew_0182.api.to_datetime_coerce:
8081

82+
``.to_datetime()`` when coercing
83+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8184

85+
A bug is fixed in ``.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`).
86+
Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``.
8287

88+
Previous Behavior:
8389

90+
.. code-block:: ipython
8491

92+
In [2]: pd.to_datetime([1, 'foo'], errors='coerce')
93+
Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None)
8594

95+
This will now convert integers/floats with the default unit of ``ns``.
8696

97+
.. ipython:: python
98+
99+
pd.to_datetime([1, 'foo'], errors='coerce')
87100

88101
.. _whatsnew_0182.api.other:
89102

@@ -139,7 +152,6 @@ Bug Fixes
139152

140153

141154

142-
143155
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
144156

145157

pandas/tests/series/test_internals.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def test_convert_objects(self):
103103
with tm.assert_produces_warning(FutureWarning):
104104
result = s.convert_objects(convert_dates='coerce',
105105
convert_numeric=False)
106-
assert_series_equal(result, s)
106+
expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
107+
assert_series_equal(result, expected)
107108

108109
# preserver if non-object
109110
s = Series([1], dtype='float32')
@@ -270,7 +271,7 @@ def test_convert(self):
270271

271272
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
272273
result = s._convert(datetime=True, coerce=True)
273-
expected = Series([lib.NaT] * 4)
274+
expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
274275
assert_series_equal(result, expected)
275276

276277
# preserver if non-object

pandas/tseries/tests/test_timeseries.py

+126-62
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,15 @@ def test_to_datetime_unit(self):
762762
with self.assertRaises(ValueError):
763763
to_datetime([1, 2, 111111111], unit='D')
764764

765+
# coerce we can process
766+
expected = DatetimeIndex([Timestamp('1970-01-02'),
767+
Timestamp('1970-01-03')] + ['NaT'] * 1)
768+
result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce')
769+
tm.assert_index_equal(result, expected)
770+
771+
result = to_datetime([1, 2, 111111111], unit='D', errors='coerce')
772+
tm.assert_index_equal(result, expected)
773+
765774
def test_series_ctor_datetime64(self):
766775
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
767776
dates = np.asarray(rng)
@@ -2283,6 +2292,123 @@ def test_to_datetime_tz_psycopg2(self):
22832292
dtype='datetime64[ns, UTC]')
22842293
tm.assert_index_equal(result, expected)
22852294

2295+
def test_unit(self):
2296+
# GH 11758
2297+
# test proper behavior with erros
2298+
2299+
with self.assertRaises(ValueError):
2300+
to_datetime([1], unit='D', format='%Y%m%d')
2301+
2302+
values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan,
2303+
'NaT', '']
2304+
result = to_datetime(values, unit='D', errors='ignore')
2305+
expected = Index([11111111, Timestamp('1970-01-02'),
2306+
Timestamp('1970-01-02'), pd.NaT,
2307+
pd.NaT, pd.NaT, pd.NaT, pd.NaT],
2308+
dtype=object)
2309+
tm.assert_index_equal(result, expected)
2310+
2311+
result = to_datetime(values, unit='D', errors='coerce')
2312+
expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
2313+
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
2314+
tm.assert_index_equal(result, expected)
2315+
2316+
with self.assertRaises(tslib.OutOfBoundsDatetime):
2317+
to_datetime(values, unit='D', errors='raise')
2318+
2319+
values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT']
2320+
2321+
result = to_datetime(values, errors='ignore', unit='s')
2322+
expected = Index([1420043460000, pd.NaT, pd.NaT,
2323+
pd.NaT, pd.NaT], dtype=object)
2324+
tm.assert_index_equal(result, expected)
2325+
2326+
result = to_datetime(values, errors='coerce', unit='s')
2327+
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
2328+
tm.assert_index_equal(result, expected)
2329+
2330+
with self.assertRaises(tslib.OutOfBoundsDatetime):
2331+
to_datetime(values, errors='raise', unit='s')
2332+
2333+
# if we have a string, then we raise a ValueError
2334+
# and NOT an OutOfBoundsDatetime
2335+
for val in ['foo', Timestamp('20130101')]:
2336+
try:
2337+
to_datetime(val, errors='raise', unit='s')
2338+
except tslib.OutOfBoundsDatetime:
2339+
raise AssertionError("incorrect exception raised")
2340+
except ValueError:
2341+
pass
2342+
2343+
def test_unit_consistency(self):
2344+
2345+
# consistency of conversions
2346+
expected = Timestamp('1970-05-09 14:25:11')
2347+
result = pd.to_datetime(11111111, unit='s', errors='raise')
2348+
self.assertEqual(result, expected)
2349+
self.assertIsInstance(result, Timestamp)
2350+
2351+
result = pd.to_datetime(11111111, unit='s', errors='coerce')
2352+
self.assertEqual(result, expected)
2353+
self.assertIsInstance(result, Timestamp)
2354+
2355+
result = pd.to_datetime(11111111, unit='s', errors='ignore')
2356+
self.assertEqual(result, expected)
2357+
self.assertIsInstance(result, Timestamp)
2358+
2359+
def test_unit_with_numeric(self):
2360+
2361+
# GH 13180
2362+
# coercions from floats/ints are ok
2363+
expected = DatetimeIndex(['2015-06-19 05:33:20',
2364+
'2015-05-27 22:33:20'])
2365+
arr1 = [1.434692e+18, 1.432766e+18]
2366+
arr2 = np.array(arr1).astype(int)
2367+
for errors in ['ignore', 'raise', 'coerce']:
2368+
result = pd.to_datetime(arr1, errors=errors)
2369+
tm.assert_index_equal(result, expected)
2370+
2371+
result = pd.to_datetime(arr2, errors=errors)
2372+
tm.assert_index_equal(result, expected)
2373+
2374+
# but we want to make sure that we are coercing
2375+
# if we have ints/strings
2376+
expected = DatetimeIndex(['NaT',
2377+
'2015-06-19 05:33:20',
2378+
'2015-05-27 22:33:20'])
2379+
arr = ['foo', 1.434692e+18, 1.432766e+18]
2380+
result = pd.to_datetime(arr, errors='coerce')
2381+
tm.assert_index_equal(result, expected)
2382+
2383+
expected = DatetimeIndex(['2015-06-19 05:33:20',
2384+
'2015-05-27 22:33:20',
2385+
'NaT',
2386+
'NaT'])
2387+
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
2388+
result = pd.to_datetime(arr, errors='coerce')
2389+
tm.assert_index_equal(result, expected)
2390+
2391+
def test_unit_mixed(self):
2392+
2393+
# mixed integers/datetimes
2394+
expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
2395+
arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
2396+
result = pd.to_datetime(arr, errors='coerce')
2397+
tm.assert_index_equal(result, expected)
2398+
2399+
with self.assertRaises(ValueError):
2400+
pd.to_datetime(arr, errors='raise')
2401+
2402+
expected = DatetimeIndex(['NaT',
2403+
'NaT',
2404+
'2013-01-01'])
2405+
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
2406+
result = pd.to_datetime(arr, errors='coerce')
2407+
tm.assert_index_equal(result, expected)
2408+
2409+
with self.assertRaises(ValueError):
2410+
pd.to_datetime(arr, errors='raise')
2411+
22862412
def test_index_to_datetime(self):
22872413
idx = Index(['1/1/2000', '1/2/2000', '1/3/2000'])
22882414

@@ -4229,68 +4355,6 @@ def check(val, unit=None, h=1, s=1, us=0):
42294355
result = Timestamp('NaT')
42304356
self.assertIs(result, NaT)
42314357

4232-
def test_unit_errors(self):
4233-
# GH 11758
4234-
# test proper behavior with erros
4235-
4236-
with self.assertRaises(ValueError):
4237-
to_datetime([1], unit='D', format='%Y%m%d')
4238-
4239-
values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan,
4240-
'NaT', '']
4241-
result = to_datetime(values, unit='D', errors='ignore')
4242-
expected = Index([11111111, Timestamp('1970-01-02'),
4243-
Timestamp('1970-01-02'), pd.NaT,
4244-
pd.NaT, pd.NaT, pd.NaT, pd.NaT],
4245-
dtype=object)
4246-
tm.assert_index_equal(result, expected)
4247-
4248-
result = to_datetime(values, unit='D', errors='coerce')
4249-
expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
4250-
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
4251-
tm.assert_index_equal(result, expected)
4252-
4253-
with self.assertRaises(tslib.OutOfBoundsDatetime):
4254-
to_datetime(values, unit='D', errors='raise')
4255-
4256-
values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT']
4257-
4258-
result = to_datetime(values, errors='ignore', unit='s')
4259-
expected = Index([1420043460000, pd.NaT, pd.NaT,
4260-
pd.NaT, pd.NaT], dtype=object)
4261-
tm.assert_index_equal(result, expected)
4262-
4263-
result = to_datetime(values, errors='coerce', unit='s')
4264-
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
4265-
tm.assert_index_equal(result, expected)
4266-
4267-
with self.assertRaises(tslib.OutOfBoundsDatetime):
4268-
to_datetime(values, errors='raise', unit='s')
4269-
4270-
# if we have a string, then we raise a ValueError
4271-
# and NOT an OutOfBoundsDatetime
4272-
for val in ['foo', Timestamp('20130101')]:
4273-
try:
4274-
to_datetime(val, errors='raise', unit='s')
4275-
except tslib.OutOfBoundsDatetime:
4276-
raise AssertionError("incorrect exception raised")
4277-
except ValueError:
4278-
pass
4279-
4280-
# consistency of conversions
4281-
expected = Timestamp('1970-05-09 14:25:11')
4282-
result = pd.to_datetime(11111111, unit='s', errors='raise')
4283-
self.assertEqual(result, expected)
4284-
self.assertIsInstance(result, Timestamp)
4285-
4286-
result = pd.to_datetime(11111111, unit='s', errors='coerce')
4287-
self.assertEqual(result, expected)
4288-
self.assertIsInstance(result, Timestamp)
4289-
4290-
result = pd.to_datetime(11111111, unit='s', errors='ignore')
4291-
self.assertEqual(result, expected)
4292-
self.assertIsInstance(result, Timestamp)
4293-
42944358
def test_roundtrip(self):
42954359

42964360
# test value to string and back conversions

pandas/tseries/tools.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
221221
- If True, require an exact format match.
222222
- If False, allow the format to match anywhere in the target string.
223223
224-
unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch
224+
unit : string, default 'ns'
225+
unit of the arg (D,s,ms,us,ns) denote the unit in epoch
225226
(e.g. a unix timestamp), which is an integer/float number.
226227
infer_datetime_format : boolean, default False
227228
If True and no `format` is given, attempt to infer the format of the

pandas/tslib.pyx

+32-15
Original file line numberDiff line numberDiff line change
@@ -2084,6 +2084,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
20842084
unit))
20852085
elif is_ignore:
20862086
raise AssertionError
2087+
iresult[i] = NPY_NAT
20872088
except:
20882089
if is_raise:
20892090
raise OutOfBoundsDatetime("cannot convert input {0}"
@@ -2151,7 +2152,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
21512152
ndarray[int64_t] iresult
21522153
ndarray[object] oresult
21532154
pandas_datetimestruct dts
2154-
bint utc_convert = bool(utc), seen_integer=0, seen_datetime=0
2155+
bint utc_convert = bool(utc), seen_integer=0, seen_string=0, seen_datetime=0
21552156
bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce'
21562157
_TSObject _ts
21572158
int out_local=0, out_tzoffset=0
@@ -2217,25 +2218,32 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
22172218
continue
22182219
raise
22192220

2220-
# if we are coercing, dont' allow integers
2221-
elif is_integer_object(val) and not is_coerce:
2222-
if val == NPY_NAT:
2221+
# these must be ns unit by-definition
2222+
elif is_integer_object(val) or is_float_object(val):
2223+
2224+
if val != val or val == NPY_NAT:
22232225
iresult[i] = NPY_NAT
2224-
else:
2226+
elif is_raise or is_ignore:
22252227
iresult[i] = val
22262228
seen_integer=1
2227-
elif is_float_object(val) and not is_coerce:
2228-
if val != val or val == NPY_NAT:
2229-
iresult[i] = NPY_NAT
22302229
else:
2231-
iresult[i] = <int64_t>val
2232-
seen_integer=1
2230+
# coerce
2231+
# we now need to parse this as if unit='ns'
2232+
# we can ONLY accept integers at this point
2233+
# if we have previously (or in future accept
2234+
# datetimes/strings, then we must coerce)
2235+
seen_integer = 1
2236+
try:
2237+
iresult[i] = cast_from_unit(val, 'ns')
2238+
except:
2239+
iresult[i] = NPY_NAT
22332240
else:
22342241
try:
22352242
if len(val) == 0 or val in _nat_strings:
22362243
iresult[i] = NPY_NAT
22372244
continue
22382245

2246+
seen_string=1
22392247
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
22402248
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
22412249
if out_local == 1:
@@ -2278,11 +2286,20 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
22782286
continue
22792287
raise
22802288

2281-
# don't allow mixed integers and datetime like
2282-
# higher levels can catch and is_coerce to object, for
2283-
# example
2284-
if seen_integer and seen_datetime:
2285-
raise ValueError("mixed datetimes and integers in passed array")
2289+
if seen_datetime and seen_integer:
2290+
# we have mixed datetimes & integers
2291+
2292+
if is_coerce:
2293+
# coerce all of the integers/floats to NaT, preserve
2294+
# the datetimes and other convertibles
2295+
for i in range(n):
2296+
val = values[i]
2297+
if is_integer_object(val) or is_float_object(val):
2298+
result[i] = NPY_NAT
2299+
elif is_raise:
2300+
raise ValueError("mixed datetimes and integers in passed array")
2301+
else:
2302+
raise TypeError
22862303

22872304
return result
22882305
except OutOfBoundsDatetime:

0 commit comments

Comments
 (0)