From 4d6e0d3c48dd4896fc6f0d9b33e02448c6f4f033 Mon Sep 17 00:00:00 2001 From: Chris Bertinato Date: Fri, 8 Mar 2019 10:34:02 -0500 Subject: [PATCH] DEPR: Deprecate box kwarg for to_timedelta and to_datetime --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/dtypes/cast.py | 4 +- pandas/core/indexes/datetimelike.py | 3 +- pandas/core/tools/datetimes.py | 8 ++ pandas/core/tools/timedeltas.py | 8 ++ pandas/io/parsers.py | 4 +- pandas/tests/indexes/datetimes/test_tools.py | 95 ++++++++++--------- pandas/tests/indexes/timedeltas/test_tools.py | 89 ++++++++++------- .../tests/scalar/timedelta/test_timedelta.py | 8 +- 9 files changed, 131 insertions(+), 89 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 284943cf49070..76ee268f2d3e6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -94,6 +94,7 @@ Deprecations ~~~~~~~~~~~~ - Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`) +- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64`/:meth:`Timedelta.to_timedelta64`. (:issue:`24416`) .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f6561948df99a..1823a8e8654fd 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -794,10 +794,10 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Immediate return if coerce if datetime: from pandas import to_datetime - return to_datetime(values, errors='coerce', box=False) + return to_datetime(values, errors='coerce').to_numpy() elif timedelta: from pandas import to_timedelta - return to_timedelta(values, errors='coerce', box=False) + return to_timedelta(values, errors='coerce').to_numpy() elif numeric: from pandas import to_numeric return to_numeric(values, errors='coerce') diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index aa7332472fc07..830f234b85757 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -300,7 +300,8 @@ def asobject(self): return self.astype(object) def _convert_tolerance(self, tolerance, target): - tolerance = np.asarray(to_timedelta(tolerance, box=False)) + tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) + if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0c76ac6cd75ac..64e06787db6fe 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -9,6 +9,7 @@ DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string) from pandas._libs.tslibs.strptime import array_strptime from pandas.compat import zip +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import ( ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype, @@ -398,6 +399,7 @@ def _adjust_to_origin(arg, origin, unit): return arg +@deprecate_kwarg(old_arg_name='box', new_arg_name=None) def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', @@ -444,6 +446,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, - If True returns a DatetimeIndex or Index-like object - If False returns ndarray of values. + + .. deprecated:: 0.25.0 + Use :meth:`.to_numpy` or :meth:`Timestamp.to_datetime64` + instead to get an ndarray of values or numpy.datetime64, + respectively. + format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 7ebaf3056e79e..41dca3bfe7500 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -8,6 +8,7 @@ from pandas._libs.tslibs import NaT from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -15,6 +16,7 @@ from pandas.core.arrays.timedeltas import sequence_to_td64ns +@deprecate_kwarg(old_arg_name='box', new_arg_name=None) def to_timedelta(arg, unit='ns', box=True, errors='raise'): """ Convert argument to timedelta. @@ -40,6 +42,12 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): - If True returns a Timedelta/TimedeltaIndex of the results. - If False returns a numpy.timedelta64 or numpy.darray of values of dtype timedelta64[ns]. + + .. deprecated:: 0.25.0 + Use :meth:`.to_numpy` or :meth:`Timedelta.to_timedelta64` + instead to get an ndarray of values or numpy.timedelta64, + respectively. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaT. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4163a571df800..5f33c387769ee 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3164,11 +3164,11 @@ def converter(*date_cols): return tools.to_datetime( ensure_object(strs), utc=None, - box=False, dayfirst=dayfirst, errors='ignore', infer_datetime_format=infer_datetime_format - ) + ).to_numpy() + except ValueError: return tools.to_datetime( parsing.try_parse_dates(strs, dayfirst=dayfirst)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index dd914d8a79837..1a1e33bd508fc 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -184,9 +184,6 @@ def test_to_datetime_format_weeks(self, cache): for s, format, dt in data: assert to_datetime(s, format=format, cache=cache) == dt - @pytest.mark.parametrize("box,const", [ - [True, pd.Index], - [False, np.array]]) @pytest.mark.parametrize("fmt,dates,expected_dates", [ ['%Y-%m-%d %H:%M:%S %Z', ['2010-01-01 12:00:00 UTC'] * 2, @@ -218,15 +215,15 @@ def test_to_datetime_format_weeks(self, cache): tzinfo=pytz.FixedOffset(0)), # pytz coerces to UTC pd.Timestamp('2010-01-01 12:00:00', tzinfo=pytz.FixedOffset(0))]]]) - def test_to_datetime_parse_tzname_or_tzoffset(self, box, const, - fmt, dates, expected_dates): + def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, + expected_dates): # GH 13486 - result = pd.to_datetime(dates, format=fmt, box=box) - expected = const(expected_dates) + result = pd.to_datetime(dates, format=fmt) + expected = pd.Index(expected_dates) tm.assert_equal(result, expected) with pytest.raises(ValueError): - pd.to_datetime(dates, format=fmt, box=box, utc=True) + pd.to_datetime(dates, format=fmt, utc=True) @pytest.mark.parametrize('offset', [ '+0', '-1foo', 'UTCbar', ':10', '+01:000:01', '']) @@ -256,7 +253,7 @@ def test_to_datetime_dtarr(self, tz): result = to_datetime(arr) assert result is arr - result = to_datetime(arr, box=True) + result = to_datetime(arr) assert result is arr def test_to_datetime_pydatetime(self): @@ -363,9 +360,9 @@ def test_to_datetime_array_of_dt64s(self, cache): # Assuming all datetimes are in bounds, to_datetime() returns # an array that is equal to Timestamp() parsing - tm.assert_numpy_array_equal( - pd.to_datetime(dts, box=False, cache=cache), - np.array([Timestamp(x).asm8 for x in dts]) + tm.assert_index_equal( + pd.to_datetime(dts, cache=cache), + pd.DatetimeIndex([Timestamp(x).asm8 for x in dts]) ) # A list of datetimes where the last one is out of bounds @@ -375,28 +372,26 @@ def test_to_datetime_array_of_dt64s(self, cache): with pytest.raises(OutOfBoundsDatetime, match=msg): pd.to_datetime(dts_with_oob, errors='raise') - tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce', + tm.assert_index_equal( + pd.to_datetime(dts_with_oob, errors='coerce', cache=cache), - np.array( + pd.DatetimeIndex( [ Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8, - tslib.iNaT, - ], - dtype='M8' + pd.NaT + ] ) ) # With errors='ignore', out of bounds datetime64s # are converted to their .item(), which depending on the version of # numpy is either a python datetime.datetime or datetime.date - tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore', + tm.assert_index_equal( + pd.to_datetime(dts_with_oob, errors='ignore', cache=cache), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' + pd.Index( + [dt.item() for dt in dts_with_oob] ) ) @@ -622,20 +617,16 @@ def test_datetime_invalid_index(self, values, format, infer): @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) - @pytest.mark.parametrize("box", [True, False]) @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index]) - def test_to_datetime_cache(self, utc, format, box, constructor): + def test_to_datetime_cache(self, utc, format, constructor): date = '20130101 00:00:00' test_dates = [date] * 10**5 data = constructor(test_dates) - result = pd.to_datetime(data, utc=utc, format=format, box=box, - cache=True) - expected = pd.to_datetime(data, utc=utc, format=format, box=box, - cache=False) - if box: - tm.assert_index_equal(result, expected) - else: - tm.assert_numpy_array_equal(result, expected) + + result = pd.to_datetime(data, utc=utc, format=format, cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, cache=False) + + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) @@ -684,7 +675,10 @@ def test_iso_8601_strings_with_same_offset(self): def test_iso_8601_strings_same_offset_no_box(self): # GH 22446 data = ['2018-01-04 09:01:00+09:00', '2018-01-04 09:02:00+09:00'] - result = pd.to_datetime(data, box=False) + + with tm.assert_produces_warning(FutureWarning): + result = pd.to_datetime(data, box=False) + expected = np.array([ datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)), datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)) @@ -753,6 +747,16 @@ def test_timestamp_utc_true(self, ts, expected): result = to_datetime(ts, utc=True) assert result == expected + def test_to_datetime_box_deprecated(self): + expected = np.datetime64('2018-09-09') + + # Deprecated - see GH24416 + with tm.assert_produces_warning(FutureWarning): + pd.to_datetime(expected, box=False) + + result = pd.to_datetime(expected).to_datetime64() + assert result == expected + class TestToDatetimeUnit(object): @pytest.mark.parametrize('cache', [True, False]) @@ -891,7 +895,7 @@ def test_unit_rounding(self, cache): def test_unit_ignore_keeps_name(self, cache): # GH 21697 expected = pd.Index([15e9] * 2, name='name') - result = pd.to_datetime(expected, errors='ignore', box=True, unit='s', + result = pd.to_datetime(expected, errors='ignore', unit='s', cache=cache) tm.assert_index_equal(result, expected) @@ -1052,7 +1056,10 @@ def test_dataframe_box_false(self): df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) - result = pd.to_datetime(df, box=False) + + with tm.assert_produces_warning(FutureWarning): + result = pd.to_datetime(df, box=False) + expected = np.array(['2015-02-04', '2016-03-05'], dtype='datetime64[ns]') tm.assert_numpy_array_equal(result, expected) @@ -1069,8 +1076,7 @@ def test_dataframe_utc_true(self): def test_to_datetime_errors_ignore_utc_true(self): # GH 23758 - result = pd.to_datetime([1], unit='s', box=True, utc=True, - errors='ignore') + result = pd.to_datetime([1], unit='s', utc=True, errors='ignore') expected = DatetimeIndex(['1970-01-01 00:00:01'], tz='UTC') tm.assert_index_equal(result, expected) @@ -1188,19 +1194,16 @@ def test_to_datetime_types(self, cache): # assert result == expected @pytest.mark.parametrize('cache', [True, False]) - @pytest.mark.parametrize('box, klass', [ - [True, Index], - [False, np.array] - ]) - def test_to_datetime_unprocessable_input(self, cache, box, klass): + def test_to_datetime_unprocessable_input(self, cache): # GH 4928 # GH 21864 - result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box) - expected = klass(np.array([1, '1'], dtype='O')) + result = to_datetime([1, '1'], errors='ignore', cache=cache) + + expected = Index(np.array([1, '1'], dtype='O')) tm.assert_equal(result, expected) msg = "invalid string coercion to datetime" with pytest.raises(TypeError, match=msg): - to_datetime([1, '1'], errors='raise', cache=cache, box=box) + to_datetime([1, '1'], errors='raise', cache=cache) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 819184d4b14f3..55664e6ca4323 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -19,15 +19,18 @@ def conv(v): d1 = np.timedelta64(1, 'D') - assert (to_timedelta('1 days 06:05:01.00003', box=False) == - conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - assert (to_timedelta('15.5us', box=False) == - conv(np.timedelta64(15500, 'ns'))) + with tm.assert_produces_warning(FutureWarning): + assert (to_timedelta('1 days 06:05:01.00003', box=False) == + conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) - # empty string - result = to_timedelta('', box=False) - assert result.astype('int64') == iNaT + with tm.assert_produces_warning(FutureWarning): + assert (to_timedelta('15.5us', box=False) == + conv(np.timedelta64(15500, 'ns'))) + + # empty string + result = to_timedelta('', box=False) + assert result.astype('int64') == iNaT result = to_timedelta(['', '']) assert isna(result).all() @@ -37,10 +40,11 @@ def conv(v): expected = pd.Index(np.array([np.timedelta64(1, 's')])) tm.assert_index_equal(result, expected) - # ints - result = np.timedelta64(0, 'ns') - expected = to_timedelta(0, box=False) - assert result == expected + with tm.assert_produces_warning(FutureWarning): + # ints + result = np.timedelta64(0, 'ns') + expected = to_timedelta(0, box=False) + assert result == expected # Series expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) @@ -53,16 +57,18 @@ def conv(v): expected = to_timedelta([0, 10], unit='s') tm.assert_index_equal(result, expected) - # single element conversion - v = timedelta(seconds=1) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - assert result == expected + with tm.assert_produces_warning(FutureWarning): + # single element conversion + v = timedelta(seconds=1) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + assert result == expected - v = np.timedelta64(timedelta(seconds=1)) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - assert result == expected + with tm.assert_produces_warning(FutureWarning): + v = np.timedelta64(timedelta(seconds=1)) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + assert result == expected # arrays of various dtypes arr = np.array([1] * 5, dtype='int64') @@ -90,22 +96,27 @@ def conv(v): expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) tm.assert_index_equal(result, expected) - # Test with lists as input when box=false - expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') - result = to_timedelta(range(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) + with tm.assert_produces_warning(FutureWarning): + # Test with lists as input when box=false + expected = np.array(np.arange(3) * 1000000000, + dtype='timedelta64[ns]') + result = to_timedelta(range(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) - result = to_timedelta(np.arange(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) + with tm.assert_produces_warning(FutureWarning): + result = to_timedelta(np.arange(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) - result = to_timedelta([0, 1, 2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) + with tm.assert_produces_warning(FutureWarning): + result = to_timedelta([0, 1, 2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) - # Tests with fractional seconds as input: - expected = np.array( - [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') - result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) + with tm.assert_produces_warning(FutureWarning): + # Tests with fractional seconds as input: + expected = np.array( + [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') + result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) def test_to_timedelta_invalid(self): @@ -188,3 +199,13 @@ def test_to_timedelta_float(self): result = pd.to_timedelta(arr, unit='s') expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64') tm.assert_numpy_array_equal(result.asi8, expected_asi8) + + def test_to_timedelta_box_deprecated(self): + result = np.timedelta64(0, 'ns') + + # Deprecated - see GH24416 + with tm.assert_produces_warning(FutureWarning): + to_timedelta(0, box=False) + + expected = to_timedelta(0).to_timedelta64() + assert result == expected diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index ee2c2e9e1959c..42ba9bbd87e52 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -318,12 +318,12 @@ def test_iso_conversion(self): assert to_timedelta('P0DT0H0M1S') == expected def test_nat_converters(self): - result = to_timedelta('nat', box=False) - assert result.dtype.kind == 'm' + result = to_timedelta('nat').to_numpy() + assert result.dtype.kind == 'M' assert result.astype('int64') == iNaT - result = to_timedelta('nan', box=False) - assert result.dtype.kind == 'm' + result = to_timedelta('nan').to_numpy() + assert result.dtype.kind == 'M' assert result.astype('int64') == iNaT @pytest.mark.filterwarnings("ignore:M and Y units are deprecated")