diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index a07991d69d48b..1b5a4586e59e7 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -197,18 +197,30 @@ or ``format``, use ``to_datetime`` if these are required. Invalid Data ~~~~~~~~~~~~ -Pass ``coerce=True`` to convert invalid data to ``NaT`` (not a time): +.. note:: + + In version 0.17.0, the default for ``to_datetime`` is now ``errors='raise'``, rather than ``errors='ignore'``. This means + that invalid parsing will raise rather that return the original input as in previous versions. + +Pass ``errors='coerce'`` to convert invalid data to ``NaT`` (not a time): .. ipython:: python + :okexcept: + + # this is the default, raise when unparseable + to_datetime(['2009-07-31', 'asd'], errors='raise') - to_datetime(['2009-07-31', 'asd']) + # return the original input when unparseable + to_datetime(['2009-07-31', 'asd'], errors='ignore') - to_datetime(['2009-07-31', 'asd'], coerce=True) + # return NaT for input when unparseable + to_datetime(['2009-07-31', 'asd'], errors='coerce') Take care, ``to_datetime`` may not act as you expect on mixed data: .. ipython:: python + :okexcept: to_datetime([1, '1']) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index aec9c37be4b4f..974c6c31535f9 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -65,10 +65,11 @@ Other enhancements - Enable `read_hdf` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`) - ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`) -- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent" (:issue:`7599`) +- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent. (:issue:`7599`) - Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` uses the beginning of the year. - ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` can parse, such as quarterly string. + Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` + uses the beginning of the year. ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` + can parse, such as a quarterly string. Previous Behavior @@ -119,6 +120,45 @@ Backwards incompatible API changes - Line and kde plot with ``subplots=True`` now uses default colors, not all black. Specify ``color='k'`` to draw all lines in black (:issue:`9894`) +.. _whatsnew_0170.api_breaking.to_datetime + +Changes to to_datetime and to_timedelta +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default for ``pd.to_datetime`` error handling has changed to ``errors='raise'``. In prior versions it was ``errors='ignore'``. +Furthermore, the ``coerce`` argument has been deprecated in favor of ``errors='coerce'``. This means that invalid parsing will raise rather that return the original +input as in previous versions. (:issue:`10636`) + +Previous Behavior: + + .. code-block:: python + + In [2]: pd.to_datetime(['2009-07-31', 'asd']) + Out[2]: array(['2009-07-31', 'asd'], dtype=object) + +New Behavior: + + .. ipython:: python + :okexcept: + + pd.to_datetime(['2009-07-31', 'asd']) + + Of course you can coerce this as well. + + .. ipython:: python + + to_datetime(['2009-07-31', 'asd'], errors='coerce') + + To keep the previous behaviour, you can use `errors='ignore'`: + + .. ipython:: python + :okexcept: + + to_datetime(['2009-07-31', 'asd'], errors='ignore') + +``pd.to_timedelta`` gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword +has been deprecated in favor of ``errors='coerce'``. + .. _whatsnew_0170.api_breaking.convert_objects: Changes to convert_objects diff --git a/pandas/core/common.py b/pandas/core/common.py index 873e6a79f741e..aaa341240f538 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1903,9 +1903,9 @@ def _possibly_convert_objects(values, # Immediate return if coerce if datetime: - return pd.to_datetime(values, coerce=True, box=False) + return pd.to_datetime(values, errors='coerce', box=False) elif timedelta: - return pd.to_timedelta(values, coerce=True, box=False) + return pd.to_timedelta(values, errors='coerce', box=False) elif numeric: return lib.maybe_convert_numeric(values, set(), coerce_numeric=True) @@ -1958,7 +1958,7 @@ def _possibly_convert_platform(values): return values -def _possibly_cast_to_datetime(value, dtype, coerce=False): +def _possibly_cast_to_datetime(value, dtype, errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -2002,9 +2002,9 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): elif np.prod(value.shape) and value.dtype != dtype: try: if is_datetime64: - value = to_datetime(value, coerce=coerce).values + value = to_datetime(value, errors=errors).values elif is_timedelta64: - value = to_timedelta(value, coerce=coerce).values + value = to_timedelta(value, errors=errors).values except (AttributeError, ValueError): pass @@ -2066,7 +2066,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False): def _try_datetime(v): # safe coerce to datetime64 try: - return tslib.array_to_datetime(v, raise_=True).reshape(shape) + return tslib.array_to_datetime(v, errors='raise').reshape(shape) except: return v diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2c6a23e492ab2..6a278e0e44306 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -341,7 +341,6 @@ def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta - coerce = True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) @@ -362,7 +361,7 @@ def _convert_to_array(self, values, name=None, other=None): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here - values = to_timedelta(values, coerce=coerce) + values = to_timedelta(values, errors='coerce') elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': @@ -381,7 +380,7 @@ def _convert_to_array(self, values, name=None, other=None): "datetime/timedelta operations [{0}]".format( ', '.join([com.pprint_thing(v) for v in values[mask]]))) - values = to_timedelta(os, coerce=coerce) + values = to_timedelta(os, errors='coerce') elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a6bb115ac0906..275c765c4cb92 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2057,6 +2057,7 @@ def converter(*date_cols): utc=None, box=False, dayfirst=dayfirst, + errors='ignore', infer_datetime_format=infer_datetime_format ) except: @@ -2064,7 +2065,7 @@ def converter(*date_cols): lib.try_parse_dates(strs, dayfirst=dayfirst)) else: try: - result = tools.to_datetime(date_parser(*date_cols)) + result = tools.to_datetime(date_parser(*date_cols), errors='ignore') if isinstance(result, datetime.datetime): raise Exception('scalar parser') return result @@ -2073,7 +2074,8 @@ def converter(*date_cols): return tools.to_datetime( lib.try_parse_dates(_concat_date_cols(date_cols), parser=date_parser, - dayfirst=dayfirst)) + dayfirst=dayfirst), + errors='ignore') except Exception: return generic_parser(date_parser, *date_cols) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 6cc4b73ed7bbe..8eefe4ba98876 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -80,17 +80,17 @@ def _convert_params(sql, params): def _handle_date_column(col, format=None): if isinstance(format, dict): - return to_datetime(col, **format) + return to_datetime(col, errors='ignore', **format) else: if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, coerce=True, unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=True) elif (issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, coerce=True, unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=True) else: - return to_datetime(col, coerce=True, format=format, utc=True) + return to_datetime(col, errors='coerce', format=format, utc=True) def _parse_date_columns(data_frame, parse_dates): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 18dd13f9b896e..859c6d3250121 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -216,7 +216,7 @@ def _get_all_tables(self): def _close_conn(self): pass - + class PandasSQLTest(unittest.TestCase): """ Base class with common private methods for SQLAlchemy and fallback cases. @@ -1271,7 +1271,7 @@ def test_datetime_NaT(self): result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) if self.flavor == 'sqlite': self.assertTrue(isinstance(result.loc[0, 'A'], string_types)) - result['A'] = to_datetime(result['A'], coerce=True) + result['A'] = to_datetime(result['A'], errors='coerce') tm.assert_frame_equal(result, df) else: tm.assert_frame_equal(result, df) @@ -1720,7 +1720,7 @@ class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass -class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): pass diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index cc9ab977241f9..9345b86758c99 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -419,7 +419,7 @@ def test_read_write_reread_dta14(self): for col in cols: expected[col] = expected[col].convert_objects(datetime=True, numeric=True) expected['float_'] = expected['float_'].astype(np.float32) - expected['date_td'] = pd.to_datetime(expected['date_td'], coerce=True) + expected['date_td'] = pd.to_datetime(expected['date_td'], errors='coerce') parsed_113 = self.read_dta(self.dta14_113) parsed_113.index.name = 'index' @@ -464,7 +464,7 @@ def test_timestamp_and_label(self): data_label = 'This is a data file.' with tm.ensure_clean() as path: original.to_stata(path, time_stamp=time_stamp, data_label=data_label) - + with StataReader(path) as reader: parsed_time_stamp = dt.datetime.strptime(reader.time_stamp, ('%d %b %Y %H:%M')) assert parsed_time_stamp == time_stamp diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 680456df104e4..d364206017c7e 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -68,7 +68,7 @@ def test_to_datetime1(): # unparseable s = 'Month 1, 1999' - assert to_datetime(s) == s + assert to_datetime(s, errors='ignore') == s def test_normalize_date(): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index dd820394d40a0..bcfeeded3abc9 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -607,12 +607,22 @@ def testit(unit, transform): # ms testit('L',lambda x: 'ms') + def test_to_timedelta_invalid(self): + # these will error self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo')) self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo')) # time not supported ATM self.assertRaises(ValueError, lambda :to_timedelta(time(second=1))) + self.assertTrue(to_timedelta(time(second=1), errors='coerce') is pd.NaT) + + self.assertRaises(ValueError, lambda : to_timedelta(['foo','bar'])) + tm.assert_index_equal(TimedeltaIndex([pd.NaT,pd.NaT]), + to_timedelta(['foo','bar'], errors='coerce')) + + tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), + to_timedelta(['1 day','bar','1 min'], errors='coerce')) def test_to_timedelta_via_apply(self): # GH 5458 diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 85aaf32e4dae2..26acbb2073ab8 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -851,7 +851,11 @@ def test_string_na_nat_conversion(self): tm.assert_numpy_array_equal(result, result2) malformed = np.array(['1/100/2000', np.nan], dtype=object) - result = to_datetime(malformed) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, lambda : to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') tm.assert_numpy_array_equal(result, malformed) self.assertRaises(ValueError, to_datetime, malformed, @@ -920,9 +924,9 @@ def test_to_datetime_with_apply(self): td = pd.Series(['May 04', 'Jun 02', ''], index=[1,2,3]) self.assertRaises(ValueError, lambda : pd.to_datetime(td,format='%b %y', errors='raise')) self.assertRaises(ValueError, lambda : td.apply(pd.to_datetime, format='%b %y', errors='raise')) - expected = pd.to_datetime(td, format='%b %y', coerce=True) + expected = pd.to_datetime(td, format='%b %y', errors='coerce') - result = td.apply(lambda x: pd.to_datetime(x, format='%b %y', coerce=True)) + result = td.apply(lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) assert_series_equal(result, expected) def test_nat_vector_field_access(self): @@ -1002,7 +1006,7 @@ def test_to_datetime_types(self): def test_to_datetime_unprocessable_input(self): # GH 4928 self.assert_numpy_array_equal( - to_datetime([1, '1']), + to_datetime([1, '1'], errors='ignore'), np.array([1, '1'], dtype='O') ) self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') @@ -1048,7 +1052,7 @@ def test_to_datetime_dt64s(self): for dt in oob_dts: self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, coerce=True), NaT) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) def test_to_datetime_array_of_dt64s(self): dts = [ @@ -1070,12 +1074,11 @@ def test_to_datetime_array_of_dt64s(self): ValueError, pd.to_datetime, dts_with_oob, - coerce=False, errors='raise' ) self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, coerce=True), + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), np.array( [ Timestamp(dts_with_oob[0]).asm8, @@ -1086,11 +1089,11 @@ def test_to_datetime_array_of_dt64s(self): ) ) - # With coerce=False and errors='ignore', out of bounds datetime64s + # With errors='ignore', out of bounds datetime64s # are converted to their .item(), which depending on the version of # numpy is either a python datetime.datetime or datetime.date self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, coerce=False), + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), np.array( [dt.item() for dt in dts_with_oob], dtype='O' @@ -4188,11 +4191,11 @@ def test_to_datetime_format_YYYYMMDD(self): # coercion # GH 7930 s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s,format='%Y%m%d') + result = pd.to_datetime(s,format='%Y%m%d',errors='ignore') expected = np.array([ datetime(2012,12,31), datetime(2014,12,31), datetime(9999,12,31) ], dtype=object) self.assert_numpy_array_equal(result, expected) - result = pd.to_datetime(s,format='%Y%m%d', coerce=True) + result = pd.to_datetime(s,format='%Y%m%d', errors='coerce') expected = Series(['20121231','20141231','NaT'],dtype='M8[ns]') assert_series_equal(result, expected) @@ -4521,25 +4524,37 @@ def test_second(self): class TestDaysInMonth(tm.TestCase): - # tests for issue #10154 + def test_coerce_deprecation(self): - def test_day_not_in_month_coerce_true_NaT(self): - self.assertTrue(isnull(to_datetime('2015-02-29', coerce=True))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True))) - - def test_day_not_in_month_coerce_false_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False) - self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False) - self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False) - self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False) - - def test_day_not_in_month_coerce_false_ignore(self): - self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29') - self.assertEqual(to_datetime('2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False), '2015-02-29') - self.assertEqual(to_datetime('2015-02-32', errors='ignore', format="%Y-%m-%d", coerce=False), '2015-02-32') - self.assertEqual(to_datetime('2015-04-31', errors='ignore', format="%Y-%m-%d", coerce=False), '2015-04-31') + # deprecation of coerce + with tm.assert_produces_warning(FutureWarning): + to_datetime('2015-02-29', coerce=True) + with tm.assert_produces_warning(FutureWarning): + self.assertRaises(ValueError, lambda : to_datetime('2015-02-29', coerce=False)) + + # multiple arguments + for e, c in zip(['raise','ignore','coerce'],[True,False]): + with tm.assert_produces_warning(FutureWarning): + self.assertRaises(TypeError, lambda : to_datetime('2015-02-29', errors=e, coerce=c)) + + # tests for issue #10154 + def test_day_not_in_month_coerce(self): + self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", errors='coerce'))) + + def test_day_not_in_month_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise') + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d") + + def test_day_not_in_month_ignore(self): + self.assertEqual(to_datetime('2015-02-29', errors='ignore'), '2015-02-29') + self.assertEqual(to_datetime('2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') + self.assertEqual(to_datetime('2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') + self.assertEqual(to_datetime('2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 397d3f7d2656f..85bae42e7a492 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -652,10 +652,10 @@ def test_number_looking_strings_not_into_datetime(self): # These strings don't look like datetimes so they shouldn't be # attempted to be converted arr = np.array(['-352.737091', '183.575577'], dtype=object) - self.assert_numpy_array_equal(tslib.array_to_datetime(arr), arr) + self.assert_numpy_array_equal(tslib.array_to_datetime(arr, errors='ignore'), arr) arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - self.assert_numpy_array_equal(tslib.array_to_datetime(arr), arr) + self.assert_numpy_array_equal(tslib.array_to_datetime(arr, errors='ignore'), arr) def test_coercing_dates_outside_of_datetime64_ns_bounds(self): invalid_dates = [ @@ -671,13 +671,12 @@ def test_coercing_dates_outside_of_datetime64_ns_bounds(self): ValueError, tslib.array_to_datetime, np.array([invalid_date], dtype='object'), - coerce=False, - raise_=True, + errors='raise', ) self.assertTrue( np.array_equal( tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), coerce=True + np.array([invalid_date], dtype='object'), errors='coerce', ), np.array([tslib.iNaT], dtype='M8[ns]') ) @@ -685,7 +684,7 @@ def test_coercing_dates_outside_of_datetime64_ns_bounds(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, coerce=True), + tslib.array_to_datetime(arr, errors='coerce'), np.array( [ tslib.iNaT, @@ -700,11 +699,11 @@ def test_coerce_of_invalid_datetimes(self): # Without coercing, the presence of any invalid dates prevents # any values from being converted - self.assert_numpy_array_equal(tslib.array_to_datetime(arr), arr) + self.assert_numpy_array_equal(tslib.array_to_datetime(arr,errors='ignore'), arr) # With coercing, the invalid dates becomes iNaT self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, coerce=True), + tslib.array_to_datetime(arr, errors='coerce'), np.array( [ '2013-01-01T00:00:00.000000000-0000', diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 60005ef6f2d6f..886d6ff42ced6 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -9,8 +9,11 @@ from pandas.core.common import (ABCSeries, is_integer_dtype, is_timedelta64_dtype, is_list_like, isnull, _ensure_object) +from pandas.util.decorators import deprecate_kwarg -def to_timedelta(arg, unit='ns', box=True, coerce=False): +@deprecate_kwarg(old_arg_name='coerce', new_arg_name='errors', + mapping={True: 'coerce', False: 'raise'}) +def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None): """ Convert argument to timedelta @@ -19,9 +22,12 @@ def to_timedelta(arg, unit='ns', box=True, coerce=False): arg : string, timedelta, array of strings (with possible NAs) unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype timedelta64[ns] - coerce : force errors to NaT (False by default) + - If True returns a Timedelta/TimedeltaIndex of the results + - if False returns a np.timedelta64 or ndarray of values of dtype timedelta64[ns] + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaT + - If 'ignore', then invalid parsing will return the input Returns ------- @@ -40,7 +46,7 @@ def _convert_listlike(arg, box, unit): elif is_integer_dtype(arg): value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False) else: - value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) + value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) if box: @@ -58,7 +64,7 @@ def _convert_listlike(arg, box, unit): return _convert_listlike(arg, box=box, unit=unit) # ...so it must be a scalar value. Return scalar. - return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, coerce=coerce) + return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, errors=errors) _unit_map = { 'Y' : 'Y', @@ -96,10 +102,10 @@ def _validate_timedelta_unit(arg): return 'ns' raise ValueError("invalid timedelta unit {0} provided".format(arg)) -def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, coerce=False): +def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): """ convert strings to timedelta; coerce to Timedelta (if box), else np.timedelta64""" - result = tslib.convert_to_timedelta(r,unit,coerce) + result = tslib.convert_to_timedelta(r,unit,errors) if box: result = tslib.Timedelta(result) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 6a1dd934d6bce..6f08448b47b1e 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -9,6 +9,7 @@ import pandas.core.common as com from pandas.compat import StringIO, callable import pandas.compat as compat +from pandas.util.decorators import deprecate_kwarg try: import dateutil @@ -171,8 +172,10 @@ def _guess_datetime_format_for_array(arr, **kwargs): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) -def to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, - utc=None, box=True, format=None, exact=True, coerce=False, +@deprecate_kwarg(old_arg_name='coerce', new_arg_name='errors', + mapping={True: 'coerce', False: 'raise'}) +def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, + utc=None, box=True, format=None, exact=True, coerce=None, unit='ns', infer_datetime_format=False): """ Convert argument to datetime. @@ -180,8 +183,10 @@ def to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, Parameters ---------- arg : string, datetime, array of strings (with possible NAs) - errors : {'ignore', 'raise'}, default 'ignore' - Errors are ignored by default (values left untouched). + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaT + - If 'ignore', then invalid parsing will return the input dayfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. @@ -189,24 +194,22 @@ def to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, with day first (this is a known bug, based on dateutil behavior). yearfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. - If True parses dates with the year first, eg 10/11/12 is parsed as 2010-11-12. - If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). + - If True parses dates with the year first, eg 10/11/12 is parsed as 2010-11-12. + - If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). Warning: yearfirst=True is not strict, but will prefer to parse with year first (this is a known bug, based on dateutil beahavior). utc : boolean, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). box : boolean, default True - If True returns a DatetimeIndex, if False returns ndarray of values. + - If True returns a DatetimeIndex + - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. exact : boolean, True by default - If True, require an exact format match. - If False, allow the format to match anywhere in the target string. - coerce : force errors to NaT (False by default) - Timestamps outside the interval between Timestamp.min and Timestamp.max - (approximately 1677-09-22 to 2262-04-11) will be also forced to NaT. + - If True, require an exact format match. + - If False, allow the format to match anywhere in the target string. unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch (e.g. a unix timestamp), which is an integer/float number. infer_datetime_format : boolean, default False @@ -256,16 +259,16 @@ def to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, >>> pd.to_datetime('13000101', format='%Y%m%d') datetime.datetime(1300, 1, 1, 0, 0) - >>> pd.to_datetime('13000101', format='%Y%m%d', coerce=True) + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT """ return _to_datetime(arg, errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, - utc=utc, box=box, format=format, exact=exact, coerce=coerce, + utc=utc, box=box, format=format, exact=exact, unit=unit, infer_datetime_format=infer_datetime_format) -def _to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, - utc=None, box=True, format=None, exact=True, coerce=False, +def _to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, + utc=None, box=True, format=None, exact=True, unit='ns', freq=None, infer_datetime_format=False): """ Same as to_datetime, but accept freq for @@ -322,7 +325,7 @@ def _convert_listlike(arg, box, format): # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg, coerce=coerce) + result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to '%Y%m%d' date format") @@ -330,8 +333,7 @@ def _convert_listlike(arg, box, format): if result is None: try: result = tslib.array_strptime( - arg, format, exact=exact, coerce=coerce - ) + arg, format, exact=exact, errors=errors) except (tslib.OutOfBoundsDatetime): if errors == 'raise': raise @@ -346,10 +348,10 @@ def _convert_listlike(arg, box, format): result = arg if result is None and (format is None or infer_datetime_format): - result = tslib.array_to_datetime(arg, raise_=errors=='raise', + result = tslib.array_to_datetime(arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq, - coerce=coerce, unit=unit, + unit=unit, require_iso8601=require_iso8601) if com.is_datetime64_dtype(result) and box: @@ -376,14 +378,20 @@ def _convert_listlike(arg, box, format): return _convert_listlike(np.array([ arg ]), box, format)[0] -def _attempt_YYYYMMDD(arg, coerce): +def _attempt_YYYYMMDD(arg, errors): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, - arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ + arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) + + Parameters + ---------- + arg : passed value + errors : 'raise','ignore','coerce' + """ def calc(carg): # calculate the actual result carg = carg.astype(object) - return tslib.array_to_datetime(lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100), coerce=coerce) + return tslib.array_to_datetime(lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100), errors=errors) def calc_with_mask(carg,mask): result = np.empty(carg.shape, dtype='M8[ns]') diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index da7cc05621775..bf134a0a6d996 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1806,9 +1806,9 @@ cpdef object _get_rule_month(object source, object default='DEC'): return source.split('-')[1] -cpdef array_to_datetime(ndarray[object] values, raise_=False, +cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, freq=None, - format=None, utc=None, coerce=False, unit=None, + format=None, utc=None, unit=None, require_iso8601=False): cdef: Py_ssize_t i, n = len(values) @@ -1817,10 +1817,14 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, ndarray[object] oresult pandas_datetimestruct dts bint utc_convert = bool(utc), seen_integer=0, seen_datetime=0 + bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce' _TSObject _ts int64_t m = cast_from_unit(None,unit) int out_local = 0, out_tzoffset = 0 + # specify error conditions + assert is_raise or is_ignore or is_coerce + try: result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -1837,7 +1841,7 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, try: _check_dts_bounds(&_ts.dts) except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise @@ -1852,7 +1856,7 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, try: _check_dts_bounds(&dts) except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise @@ -1862,7 +1866,7 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, _check_dts_bounds(&dts) seen_datetime=1 except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise @@ -1874,19 +1878,19 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, iresult[i] = _get_datetime64_nanos(val) seen_datetime=1 except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise # if we are coercing, dont' allow integers - elif is_integer_object(val) and not coerce: + elif is_integer_object(val) and not is_coerce: if val == iNaT: iresult[i] = iNaT else: iresult[i] = val*m seen_integer=1 - elif is_float_object(val) and not coerce: + elif is_float_object(val) and not is_coerce: if val != val or val == iNaT: iresult[i] = iNaT else: @@ -1911,10 +1915,10 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: - if coerce: + if is_coerce: iresult[i] = iNaT continue - elif raise_: + elif is_raise: raise ValueError("time data %r does match format specified" % (val,)) else: @@ -1924,34 +1928,34 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq) except Exception: - if coerce: + if is_coerce: iresult[i] = iNaT continue - raise TypeError + raise TypeError("invalid string coercion to datetime") try: _ts = convert_to_tsobject(py_dt, None, None) iresult[i] = _ts.value except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise except: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise # don't allow mixed integers and datetime like - # higher levels can catch and coerce to object, for + # higher levels can catch and is_coerce to object, for # example if seen_integer and seen_datetime: raise ValueError("mixed datetimes and integers in passed array") return result except OutOfBoundsDatetime: - if raise_: + if is_raise: raise oresult = np.empty(n, dtype=object) @@ -1987,12 +1991,12 @@ cpdef array_to_datetime(ndarray[object] values, raise_=False, _pydatetime_to_dts(oresult[i], &dts) _check_dts_bounds(&dts) except Exception: - if raise_: + if is_raise: raise return values # oresult[i] = val else: - if raise_: + if is_raise: raise return values @@ -2548,13 +2552,16 @@ cdef PyTypeObject* td_type = Timedelta cdef inline bint is_timedelta(object o): return Py_TYPE(o) == td_type # isinstance(o, Timedelta) -def array_to_timedelta64(ndarray[object] values, unit='ns', coerce=False): +def array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): """ convert an ndarray to an array of ints that are timedeltas force conversion if coerce = True, else will raise if cannot convert """ cdef: Py_ssize_t i, n ndarray[int64_t] iresult + bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce' + + assert is_raise or is_ignore or is_coerce n = values.shape[0] result = np.empty(n, dtype='m8[ns]') @@ -2564,15 +2571,18 @@ def array_to_timedelta64(ndarray[object] values, unit='ns', coerce=False): # if so then we hit the fast path try: for i in range(n): - result[i] = parse_timedelta_string(values[i], coerce) + result[i] = parse_timedelta_string(values[i], is_coerce) except: for i in range(n): - result[i] = convert_to_timedelta64(values[i], unit, coerce) + result[i] = convert_to_timedelta64(values[i], unit, is_coerce) return iresult -def convert_to_timedelta(object ts, object unit='ns', coerce=False): - return convert_to_timedelta64(ts, unit, coerce) +def convert_to_timedelta(object ts, object unit='ns', errors='raise'): + cdef bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce' + + assert is_raise or is_ignore or is_coerce + return convert_to_timedelta64(ts, unit, is_coerce) cdef dict timedelta_abbrevs = { 'd' : 'd', 'days' : 'd', @@ -2892,7 +2902,7 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce): raise ValueError("Invalid type for timedelta scalar: %s" % type(ts)) return ts.astype('timedelta64[ns]') -def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coerce=False): +def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'): """ Parameters ---------- @@ -2911,6 +2921,9 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe int64_t us, ns object val, group_key, ampm, found dict found_key + bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce' + + assert is_raise or is_ignore or is_coerce global _TimeRE_cache, _regex_cache with _cache_lock: @@ -2983,13 +2996,13 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe if exact: found = format_regex.match(val) if not found: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise ValueError("time data %r does not match format %r (match)" % (values[i], fmt)) if len(val) != found.end(): - if coerce: + if is_coerce: iresult[i] = iNaT continue raise ValueError("unconverted data remains: %s" % @@ -2999,7 +3012,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe else: found = format_regex.search(val) if not found: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise ValueError("time data %r does not match format %r (search)" % @@ -3134,7 +3147,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe month = datetime_result.month day = datetime_result.day except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise @@ -3154,7 +3167,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe try: _check_dts_bounds(&dts) except ValueError: - if coerce: + if is_coerce: iresult[i] = iNaT continue raise diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 9cd538511e946..4544c3cdb8919 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -43,7 +43,7 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None): FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning - >>> f(cols='should error', columns="can't pass do both") + >>> f(cols='should error', columns="can\'t pass do both") TypeError: Can only specify 'cols' or 'columns', not both >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) ... def f(new=False): @@ -78,6 +78,7 @@ def wrapper(*args, **kwargs): new_arg_value = old_arg_value msg = "the '%s' keyword is deprecated, " \ "use '%s' instead" % (old_arg_name, new_arg_name) + warnings.warn(msg, FutureWarning) if kwargs.get(new_arg_name, None) is not None: msg = "Can only specify '%s' or '%s', not both" % \ @@ -287,4 +288,3 @@ def make_signature(func) : if spec.keywords: args.append('**' + spec.keywords) return args, spec.args -