From d856bb15463105131eadd68d41b2f7d39900674c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 16 Jan 2019 10:08:58 -0600 Subject: [PATCH 1/3] DEPR/API: Non-ns precision in Index constructors This deprecates passing dtypes without a precision to DatetimeIndex and TimedeltaIndex ```python In [2]: pd.DatetimeIndex(['2000'], dtype='datetime64') /Users/taugspurger/.virtualenvs/pandas-dev/bin/ipython:1: FutureWarning: Passing in 'datetime64' dtype with no precision is deprecated and will raise in a future version. Please pass in 'datetime64[ns]' instead. #!/Users/taugspurger/Envs/pandas-dev/bin/python3 Out[2]: DatetimeIndex(['2000-01-01'], dtype='datetime64[ns]', freq=None) ``` Previously, we ignored the precision, so that things like ``` In [3]: pd.DatetimeIndex(['2000'], dtype='datetime64[us]') Out[3]: DatetimeIndex(['2000-01-01'], dtype='datetime64[ns]', freq=None) ``` worked. That is deprecated as well. Closes https://github.com/pandas-dev/pandas/issues/24739 Closes https://github.com/pandas-dev/pandas/issues/24753 --- doc/source/whatsnew/v0.24.0.rst | 2 + pandas/core/arrays/datetimes.py | 11 +++++ pandas/core/arrays/timedeltas.py | 42 +++++++++++++------ pandas/core/indexes/base.py | 3 +- pandas/tests/arrays/test_timedeltas.py | 8 ++-- .../indexes/datetimes/test_construction.py | 17 ++++++++ .../indexes/timedeltas/test_construction.py | 17 ++++++++ 7 files changed, 82 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d69e948e31e33..b6a6090bb12cd 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -436,6 +436,7 @@ Backwards incompatible API changes - Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`) - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) - In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` no longer ignore the dtype precision. Passing a non-nanosecond resolution dtype will raise a ``ValueError`` (:issue:`24753`) Percentage change on groupby ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1310,6 +1311,7 @@ Deprecations - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) - ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). +- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a2d67efbecbba..f50b5884a0e05 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from datetime import datetime, time, timedelta +import textwrap import warnings import numpy as np @@ -1986,6 +1987,16 @@ def _validate_dt64_dtype(dtype): """ if dtype is not None: dtype = pandas_dtype(dtype) + + if isinstance(dtype, np.dtype) and dtype == np.dtype("M8"): + # no precision, warn + dtype = _NS_DTYPE + msg = textwrap.dedent("""\ + Passing in 'datetime64' dtype with no precision is deprecated + and will raise in a future version. Please pass in + 'datetime64[ns]' instead.""") + warnings.warn(msg, FutureWarning, stacklevel=5) + if ((isinstance(dtype, np.dtype) and dtype != _NS_DTYPE) or not isinstance(dtype, (np.dtype, DatetimeTZDtype))): raise ValueError("Unexpected value for 'dtype': '{dtype}'. " diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index be1a7097b0e0d..ec30394fb4872 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -2,6 +2,7 @@ from __future__ import division from datetime import timedelta +import textwrap import warnings import numpy as np @@ -160,16 +161,8 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): # nanosecond UTC (or tz-naive) unix timestamps values = values.view(_TD_DTYPE) - if values.dtype != _TD_DTYPE: - raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) - - try: - dtype_mismatch = dtype != _TD_DTYPE - except TypeError: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) - else: - if dtype_mismatch: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) if freq == "infer": msg = ( @@ -204,9 +197,8 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): @classmethod def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, freq=None, unit=None): - if dtype != _TD_DTYPE: - raise ValueError("Only timedelta64[ns] dtype is valid.") - + if dtype: + _validate_td64_dtype(dtype) freq, freq_infer = dtl.maybe_infer_freq(freq) data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) @@ -997,6 +989,30 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): return result.view('timedelta64[ns]') +def _validate_td64_dtype(dtype): + try: + if dtype == np.dtype("timedelta64"): + dtype = _TD_DTYPE + msg = textwrap.dedent("""\ + Passing in 'timedelta' dtype with no precision is deprecated + and will raise in a future version. Please pass in + 'timedelta64[ns]' instead.""") + warnings.warn(msg, FutureWarning, stacklevel=4) + except TypeError: + # extension dtype + pass + + try: + dtype_mismatch = dtype != _TD_DTYPE + except TypeError: + raise ValueError(_BAD_DTYPE.format(dtype=dtype)) + else: + if dtype_mismatch: + raise ValueError(_BAD_DTYPE.format(dtype=dtype)) + + return dtype + + def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5a9bf6c2c6263..0ac280d8346a6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -311,7 +311,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif (is_timedelta64_dtype(data) or (dtype is not None and is_timedelta64_dtype(dtype))): from pandas import TimedeltaIndex - result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) + result = TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, + **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pytimedelta(), dtype=_o_dtype) else: diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index a8745f78392ca..6b4662ca02e80 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -23,18 +23,18 @@ def test_non_array_raises(self): TimedeltaArray([1, 2, 3]) def test_other_type_raises(self): - with pytest.raises(TypeError, + with pytest.raises(ValueError, match="dtype bool cannot be converted"): TimedeltaArray(np.array([1, 2, 3], dtype='bool')) def test_incorrect_dtype_raises(self): # TODO: why TypeError for 'category' but ValueError for i8? - with pytest.raises(TypeError, + with pytest.raises(ValueError, match=r'category cannot be converted ' r'to timedelta64\[ns\]'): TimedeltaArray(np.array([1, 2, 3], dtype='i8'), dtype='category') - with pytest.raises(TypeError, + with pytest.raises(ValueError, match=r"dtype int64 cannot be converted " r"to timedelta64\[ns\]"): TimedeltaArray(np.array([1, 2, 3], dtype='i8'), @@ -52,7 +52,7 @@ def test_copy(self): class TestTimedeltaArray(object): def test_from_sequence_dtype(self): - msg = r"Only timedelta64\[ns\] dtype is valid" + msg = "dtype .*object.* cannot be converted to timedelta64" with pytest.raises(ValueError, match=msg): TimedeltaArray._from_sequence([], dtype=object) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 2768da0316aad..db84e1ef09845 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -634,6 +634,23 @@ def test_construction_with_nat_and_tzlocal(self): expected = DatetimeIndex([Timestamp('2018', tz=tz), pd.NaT]) tm.assert_index_equal(result, expected) + def test_constructor_no_precision_warns(self): + # GH-24753, GH-24739 + expected = pd.DatetimeIndex(['2000'], dtype='datetime64[ns]') + + # we set the stacklevel for DatetimeIndex + with tm.assert_produces_warning(FutureWarning): + result = pd.DatetimeIndex(['2000'], dtype='datetime64') + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = pd.Index(['2000'], dtype='datetime64') + tm.assert_index_equal(result, expected) + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.DatetimeIndex(['2000'], dtype='datetime64[us]') + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 76f79e86e6f11..3938d6acad2f0 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -180,3 +180,20 @@ def test_constructor_name(self): # GH10025 idx2 = TimedeltaIndex(idx, name='something else') assert idx2.name == 'something else' + + def test_constructor_no_precision_warns(self): + # GH-24753, GH-24739 + expected = pd.TimedeltaIndex(['2000'], dtype='timedelta64[ns]') + + # we set the stacklevel for DatetimeIndex + with tm.assert_produces_warning(FutureWarning): + result = pd.TimedeltaIndex(['2000'], dtype='timedelta64') + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = pd.Index(['2000'], dtype='timedelta64') + tm.assert_index_equal(result, expected) + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.TimedeltaIndex(['2000'], dtype='timedelta64[us]') From 37d67798113bcbddb350871154c2314939f67378 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 16 Jan 2019 11:31:38 -0600 Subject: [PATCH 2/3] Handle object dtype --- pandas/core/indexes/base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0ac280d8346a6..b0acd5f7a5614 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -311,11 +311,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif (is_timedelta64_dtype(data) or (dtype is not None and is_timedelta64_dtype(dtype))): from pandas import TimedeltaIndex - result = TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, - **kwargs) - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pytimedelta(), dtype=_o_dtype) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): + # Note we can pass copy=False because the .astype below + # will always make a copy + result = TimedeltaIndex(data, copy=False, name=name, **kwargs) + return result.astype(object) else: + result = TimedeltaIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) return result elif is_period_dtype(data) and not is_object_dtype(dtype): From a9a95888948075af4b9d0349ca382f30f291a657 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Jan 2019 07:21:45 -0600 Subject: [PATCH 3/3] updates --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/arrays/datetimes.py | 3 +-- pandas/core/arrays/timedeltas.py | 32 ++++++++++++-------------------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index c71aa8366676a..ff4aa9968f294 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1260,7 +1260,7 @@ Deprecations - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) - ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). -- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). +- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f50b5884a0e05..f2aeb1c1309de 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1987,8 +1987,7 @@ def _validate_dt64_dtype(dtype): """ if dtype is not None: dtype = pandas_dtype(dtype) - - if isinstance(dtype, np.dtype) and dtype == np.dtype("M8"): + if is_dtype_equal(dtype, np.dtype("M8")): # no precision, warn dtype = _NS_DTYPE msg = textwrap.dedent("""\ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index ec30394fb4872..910cb96a86216 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -16,8 +16,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, + is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -990,25 +990,17 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): def _validate_td64_dtype(dtype): - try: - if dtype == np.dtype("timedelta64"): - dtype = _TD_DTYPE - msg = textwrap.dedent("""\ - Passing in 'timedelta' dtype with no precision is deprecated - and will raise in a future version. Please pass in - 'timedelta64[ns]' instead.""") - warnings.warn(msg, FutureWarning, stacklevel=4) - except TypeError: - # extension dtype - pass - - try: - dtype_mismatch = dtype != _TD_DTYPE - except TypeError: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("timedelta64")): + dtype = _TD_DTYPE + msg = textwrap.dedent("""\ + Passing in 'timedelta' dtype with no precision is deprecated + and will raise in a future version. Please pass in + 'timedelta64[ns]' instead.""") + warnings.warn(msg, FutureWarning, stacklevel=4) + + if not is_dtype_equal(dtype, _TD_DTYPE): raise ValueError(_BAD_DTYPE.format(dtype=dtype)) - else: - if dtype_mismatch: - raise ValueError(_BAD_DTYPE.format(dtype=dtype)) return dtype