Skip to content

Commit b281e65

Browse files
committed
Merge pull request #10674 from jreback/dt_default
API: #10636, changing default of to_datetime to raise, deprecating coerce in favor of errors
2 parents 03be332 + 987b7e7 commit b281e65

16 files changed

+230
-126
lines changed

doc/source/timeseries.rst

+15-3
Original file line numberDiff line numberDiff line change
@@ -197,18 +197,30 @@ or ``format``, use ``to_datetime`` if these are required.
197197
Invalid Data
198198
~~~~~~~~~~~~
199199

200-
Pass ``coerce=True`` to convert invalid data to ``NaT`` (not a time):
200+
.. note::
201+
202+
In version 0.17.0, the default for ``to_datetime`` is now ``errors='raise'``, rather than ``errors='ignore'``. This means
203+
that invalid parsing will raise rather that return the original input as in previous versions.
204+
205+
Pass ``errors='coerce'`` to convert invalid data to ``NaT`` (not a time):
201206

202207
.. ipython:: python
208+
:okexcept:
209+
210+
# this is the default, raise when unparseable
211+
to_datetime(['2009-07-31', 'asd'], errors='raise')
203212
204-
to_datetime(['2009-07-31', 'asd'])
213+
# return the original input when unparseable
214+
to_datetime(['2009-07-31', 'asd'], errors='ignore')
205215
206-
to_datetime(['2009-07-31', 'asd'], coerce=True)
216+
# return NaT for input when unparseable
217+
to_datetime(['2009-07-31', 'asd'], errors='coerce')
207218
208219
209220
Take care, ``to_datetime`` may not act as you expect on mixed data:
210221

211222
.. ipython:: python
223+
:okexcept:
212224
213225
to_datetime([1, '1'])
214226

doc/source/whatsnew/v0.17.0.txt

+43-3
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ Other enhancements
6565
- Enable `read_hdf` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`)
6666

6767
- ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`)
68-
- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent" (:issue:`7599`)
68+
- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent. (:issue:`7599`)
6969

70-
Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` uses the beginning of the year.
71-
``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` can parse, such as quarterly string.
70+
Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex``
71+
uses the beginning of the year. ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex``
72+
can parse, such as a quarterly string.
7273

7374
Previous Behavior
7475

@@ -121,6 +122,45 @@ Backwards incompatible API changes
121122

122123
- Line and kde plot with ``subplots=True`` now uses default colors, not all black. Specify ``color='k'`` to draw all lines in black (:issue:`9894`)
123124

125+
.. _whatsnew_0170.api_breaking.to_datetime
126+
127+
Changes to to_datetime and to_timedelta
128+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
129+
130+
The default for ``pd.to_datetime`` error handling has changed to ``errors='raise'``. In prior versions it was ``errors='ignore'``.
131+
Furthermore, the ``coerce`` argument has been deprecated in favor of ``errors='coerce'``. This means that invalid parsing will raise rather that return the original
132+
input as in previous versions. (:issue:`10636`)
133+
134+
Previous Behavior:
135+
136+
.. code-block:: python
137+
138+
In [2]: pd.to_datetime(['2009-07-31', 'asd'])
139+
Out[2]: array(['2009-07-31', 'asd'], dtype=object)
140+
141+
New Behavior:
142+
143+
.. ipython:: python
144+
:okexcept:
145+
146+
pd.to_datetime(['2009-07-31', 'asd'])
147+
148+
Of course you can coerce this as well.
149+
150+
.. ipython:: python
151+
152+
to_datetime(['2009-07-31', 'asd'], errors='coerce')
153+
154+
To keep the previous behaviour, you can use `errors='ignore'`:
155+
156+
.. ipython:: python
157+
:okexcept:
158+
159+
to_datetime(['2009-07-31', 'asd'], errors='ignore')
160+
161+
``pd.to_timedelta`` gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword
162+
has been deprecated in favor of ``errors='coerce'``.
163+
124164
.. _whatsnew_0170.api_breaking.convert_objects:
125165

126166
Changes to convert_objects

pandas/core/common.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1903,9 +1903,9 @@ def _possibly_convert_objects(values,
19031903

19041904
# Immediate return if coerce
19051905
if datetime:
1906-
return pd.to_datetime(values, coerce=True, box=False)
1906+
return pd.to_datetime(values, errors='coerce', box=False)
19071907
elif timedelta:
1908-
return pd.to_timedelta(values, coerce=True, box=False)
1908+
return pd.to_timedelta(values, errors='coerce', box=False)
19091909
elif numeric:
19101910
return lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
19111911

@@ -1958,7 +1958,7 @@ def _possibly_convert_platform(values):
19581958
return values
19591959

19601960

1961-
def _possibly_cast_to_datetime(value, dtype, coerce=False):
1961+
def _possibly_cast_to_datetime(value, dtype, errors='raise'):
19621962
""" try to cast the array/value to a datetimelike dtype, converting float
19631963
nan to iNaT
19641964
"""
@@ -2002,9 +2002,9 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
20022002
elif np.prod(value.shape) and value.dtype != dtype:
20032003
try:
20042004
if is_datetime64:
2005-
value = to_datetime(value, coerce=coerce).values
2005+
value = to_datetime(value, errors=errors).values
20062006
elif is_timedelta64:
2007-
value = to_timedelta(value, coerce=coerce).values
2007+
value = to_timedelta(value, errors=errors).values
20082008
except (AttributeError, ValueError):
20092009
pass
20102010

@@ -2066,7 +2066,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False):
20662066
def _try_datetime(v):
20672067
# safe coerce to datetime64
20682068
try:
2069-
return tslib.array_to_datetime(v, raise_=True).reshape(shape)
2069+
return tslib.array_to_datetime(v, errors='raise').reshape(shape)
20702070
except:
20712071
return v
20722072

pandas/core/ops.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ def _convert_to_array(self, values, name=None, other=None):
341341
"""converts values to ndarray"""
342342
from pandas.tseries.timedeltas import to_timedelta
343343

344-
coerce = True
345344
if not is_list_like(values):
346345
values = np.array([values])
347346
inferred_type = lib.infer_dtype(values)
@@ -362,7 +361,7 @@ def _convert_to_array(self, values, name=None, other=None):
362361
values = tslib.array_to_datetime(values)
363362
elif inferred_type in ('timedelta', 'timedelta64'):
364363
# have a timedelta, convert to to ns here
365-
values = to_timedelta(values, coerce=coerce)
364+
values = to_timedelta(values, errors='coerce')
366365
elif inferred_type == 'integer':
367366
# py3 compat where dtype is 'm' but is an integer
368367
if values.dtype.kind == 'm':
@@ -381,7 +380,7 @@ def _convert_to_array(self, values, name=None, other=None):
381380
"datetime/timedelta operations [{0}]".format(
382381
', '.join([com.pprint_thing(v)
383382
for v in values[mask]])))
384-
values = to_timedelta(os, coerce=coerce)
383+
values = to_timedelta(os, errors='coerce')
385384
elif inferred_type == 'floating':
386385

387386
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)

pandas/io/parsers.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2057,14 +2057,15 @@ def converter(*date_cols):
20572057
utc=None,
20582058
box=False,
20592059
dayfirst=dayfirst,
2060+
errors='ignore',
20602061
infer_datetime_format=infer_datetime_format
20612062
)
20622063
except:
20632064
return tools.to_datetime(
20642065
lib.try_parse_dates(strs, dayfirst=dayfirst))
20652066
else:
20662067
try:
2067-
result = tools.to_datetime(date_parser(*date_cols))
2068+
result = tools.to_datetime(date_parser(*date_cols), errors='ignore')
20682069
if isinstance(result, datetime.datetime):
20692070
raise Exception('scalar parser')
20702071
return result
@@ -2073,7 +2074,8 @@ def converter(*date_cols):
20732074
return tools.to_datetime(
20742075
lib.try_parse_dates(_concat_date_cols(date_cols),
20752076
parser=date_parser,
2076-
dayfirst=dayfirst))
2077+
dayfirst=dayfirst),
2078+
errors='ignore')
20772079
except Exception:
20782080
return generic_parser(date_parser, *date_cols)
20792081

pandas/io/sql.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,17 @@ def _convert_params(sql, params):
8080

8181
def _handle_date_column(col, format=None):
8282
if isinstance(format, dict):
83-
return to_datetime(col, **format)
83+
return to_datetime(col, errors='ignore', **format)
8484
else:
8585
if format in ['D', 's', 'ms', 'us', 'ns']:
86-
return to_datetime(col, coerce=True, unit=format, utc=True)
86+
return to_datetime(col, errors='coerce', unit=format, utc=True)
8787
elif (issubclass(col.dtype.type, np.floating)
8888
or issubclass(col.dtype.type, np.integer)):
8989
# parse dates as timestamp
9090
format = 's' if format is None else format
91-
return to_datetime(col, coerce=True, unit=format, utc=True)
91+
return to_datetime(col, errors='coerce', unit=format, utc=True)
9292
else:
93-
return to_datetime(col, coerce=True, format=format, utc=True)
93+
return to_datetime(col, errors='coerce', format=format, utc=True)
9494

9595

9696
def _parse_date_columns(data_frame, parse_dates):

pandas/io/tests/test_sql.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _get_all_tables(self):
216216

217217
def _close_conn(self):
218218
pass
219-
219+
220220
class PandasSQLTest(unittest.TestCase):
221221
"""
222222
Base class with common private methods for SQLAlchemy and fallback cases.
@@ -1271,7 +1271,7 @@ def test_datetime_NaT(self):
12711271
result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
12721272
if self.flavor == 'sqlite':
12731273
self.assertTrue(isinstance(result.loc[0, 'A'], string_types))
1274-
result['A'] = to_datetime(result['A'], coerce=True)
1274+
result['A'] = to_datetime(result['A'], errors='coerce')
12751275
tm.assert_frame_equal(result, df)
12761276
else:
12771277
tm.assert_frame_equal(result, df)
@@ -1720,7 +1720,7 @@ class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
17201720
pass
17211721

17221722

1723-
class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn):
1723+
class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn):
17241724
pass
17251725

17261726

pandas/io/tests/test_stata.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def test_read_write_reread_dta14(self):
419419
for col in cols:
420420
expected[col] = expected[col].convert_objects(datetime=True, numeric=True)
421421
expected['float_'] = expected['float_'].astype(np.float32)
422-
expected['date_td'] = pd.to_datetime(expected['date_td'], coerce=True)
422+
expected['date_td'] = pd.to_datetime(expected['date_td'], errors='coerce')
423423

424424
parsed_113 = self.read_dta(self.dta14_113)
425425
parsed_113.index.name = 'index'

pandas/tseries/tests/test_offsets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_to_datetime1():
6868

6969
# unparseable
7070
s = 'Month 1, 1999'
71-
assert to_datetime(s) == s
71+
assert to_datetime(s, errors='ignore') == s
7272

7373

7474
def test_normalize_date():

pandas/tseries/tests/test_timedeltas.py

+10
Original file line numberDiff line numberDiff line change
@@ -607,12 +607,22 @@ def testit(unit, transform):
607607
# ms
608608
testit('L',lambda x: 'ms')
609609

610+
def test_to_timedelta_invalid(self):
611+
610612
# these will error
611613
self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo'))
612614
self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo'))
613615

614616
# time not supported ATM
615617
self.assertRaises(ValueError, lambda :to_timedelta(time(second=1)))
618+
self.assertTrue(to_timedelta(time(second=1), errors='coerce') is pd.NaT)
619+
620+
self.assertRaises(ValueError, lambda : to_timedelta(['foo','bar']))
621+
tm.assert_index_equal(TimedeltaIndex([pd.NaT,pd.NaT]),
622+
to_timedelta(['foo','bar'], errors='coerce'))
623+
624+
tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']),
625+
to_timedelta(['1 day','bar','1 min'], errors='coerce'))
616626

617627
def test_to_timedelta_via_apply(self):
618628
# GH 5458

0 commit comments

Comments
 (0)