From e2a63c5a40fb0ac60a2c251b530bdd9f782989de Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Jan 2019 15:34:17 -0800 Subject: [PATCH 1/8] 24024 followup, ensure freq validation --- pandas/core/arrays/timedeltas.py | 82 ++++++++------------------ pandas/core/indexes/timedeltas.py | 8 ++- pandas/tests/arrays/test_timedeltas.py | 11 +++- 3 files changed, 40 insertions(+), 61 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1ec37c9f228a6..d876862c9c1c8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -15,8 +15,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, + is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -134,55 +134,38 @@ def dtype(self): _attributes = ["freq"] def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): - if isinstance(values, (ABCSeries, ABCIndexClass)): - values = values._values - - if isinstance(values, type(self)): - values, freq, freq_infer = extract_values_freq(values, freq) - - if not isinstance(values, np.ndarray): - msg = ( + if not hasattr(values, "dtype"): + raise ValueError( "Unexpected type '{}'. 'values' must be a TimedeltaArray " "ndarray, or Series or Index containing one of those." - ) - raise ValueError(msg.format(type(values).__name__)) - - if values.dtype == 'i8': - # for compat with datetime/timedelta/period shared methods, - # we can sometimes get here with int64 values. These represent - # nanosecond UTC (or tz-naive) unix timestamps - values = values.view(_TD_DTYPE) - - if values.dtype != _TD_DTYPE: - raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) - - try: - dtype_mismatch = dtype != _TD_DTYPE - except TypeError: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) - else: - if dtype_mismatch: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) - + .format(type(values).__name__)) if freq == "infer": - msg = ( + raise ValueError( "Frequency inference not allowed in TimedeltaArray.__init__. " - "Use 'pd.array()' instead." - ) - raise ValueError(msg) + "Use 'pd.array()' instead.") - if copy: - values = values.copy() - if freq: - freq = to_offset(freq) + if dtype is not None and not is_dtype_equal(dtype, _TD_DTYPE): + raise TypeError("dtype {dtype} cannot be converted to " + "timedelta64[ns]".format(dtype=dtype)) + + if values.dtype == 'i8': + values = values.view('timedelta64[ns]') - self._data = values - self._dtype = dtype - self._freq = freq + result = type(self)._from_sequence(values, dtype=dtype, + copy=copy, freq=freq) + self._data = result._data + self._freq = result._freq + self._dtype = result._dtype @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): - return cls(values, dtype=dtype, freq=freq) + assert isinstance(values, np.ndarray), type(values) + + result = object.__new__(cls) + result._data = values.view(_TD_DTYPE) + result._freq = to_offset(freq) + result._dtype = _TD_DTYPE + return result @classmethod def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, @@ -998,18 +981,3 @@ def _generate_regular_range(start, end, periods, offset): data = np.arange(b, e, stride, dtype=np.int64) return data - - -def extract_values_freq(arr, freq): - # type: (TimedeltaArray, Offset) -> Tuple[ndarray, Offset, bool] - freq_infer = False - if freq is None: - freq = arr.freq - elif freq and arr.freq: - freq = to_offset(freq) - freq, freq_infer = dtl.validate_inferred_freq( - freq, arr.freq, - freq_infer=False - ) - values = arr._data - return values, freq, freq_infer diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b9d6b8da2cada..d6716d50e8a17 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -233,12 +233,14 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): if not isinstance(values, TimedeltaArray): values = TimedeltaArray._simple_new(values, dtype=dtype, freq=freq) + else: + if freq is None: + freq = values.freq assert isinstance(values, TimedeltaArray), type(values) assert dtype == _TD_DTYPE, dtype assert values.dtype == 'm8[ns]', values.dtype - freq = to_offset(freq) - tdarr = TimedeltaArray._simple_new(values, freq=freq) + tdarr = TimedeltaArray._simple_new(values._data, freq=freq) result = object.__new__(cls) result._data = tdarr result.name = name @@ -310,7 +312,7 @@ def __getitem__(self, key): result = self._data.__getitem__(key) if is_scalar(result): return result - return type(self)(result, name=self.name) + return type(self)(result, freq=result.freq, name=self.name) # ------------------------------------------------------------------- diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 481350640e1a6..55a72e149575e 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -9,6 +9,15 @@ class TestTimedeltaArrayConstructor(object): + def test_freq_validation(self): + # ensure that the public constructor cannot create an invalid instance + arr = np.array([0, 0, 1]) * 3600 * 10**9 + + msg = ("Inferred frequency None from passed values does not " + "conform to passed frequency D") + with pytest.raises(ValueError, match=msg): + TimedeltaArray(arr.view('timedelta64[ns]'), freq="D") + def test_non_array_raises(self): with pytest.raises(ValueError, match='list'): TimedeltaArray([1, 2, 3]) @@ -34,7 +43,7 @@ def test_incorrect_dtype_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype='m8[ns]') arr = TimedeltaArray(data, copy=False) - assert arr._data is data + assert arr._data.base is data arr = TimedeltaArray(data, copy=True) assert arr._data is not data From 09c2a37ac765bba6fc514d8cdfa644addee4bf4b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Jan 2019 15:39:21 -0800 Subject: [PATCH 2/8] revert unnecessary change --- pandas/core/indexes/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index d6716d50e8a17..893926cc076ab 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -312,7 +312,7 @@ def __getitem__(self, key): result = self._data.__getitem__(key) if is_scalar(result): return result - return type(self)(result, freq=result.freq, name=self.name) + return type(self)(result, name=self.name) # ------------------------------------------------------------------- From 9097bf06534c3da7c0134f357ce50c950531bfbd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Jan 2019 19:03:36 -0800 Subject: [PATCH 3/8] 32bit compat --- pandas/tests/arrays/test_timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 55a72e149575e..af23b2467fcdf 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,7 +11,7 @@ class TestTimedeltaArrayConstructor(object): def test_freq_validation(self): # ensure that the public constructor cannot create an invalid instance - arr = np.array([0, 0, 1]) * 3600 * 10**9 + arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10**9 msg = ("Inferred frequency None from passed values does not " "conform to passed frequency D") From eba0c51eb7990356ba32fa3440a801a2fe98e9a7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Jan 2019 08:33:53 -0800 Subject: [PATCH 4/8] faster dtype checks --- pandas/core/arrays/timedeltas.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d876862c9c1c8..578dfd076b412 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -159,6 +159,7 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): + assert dtype is _TD_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) result = object.__new__(cls) @@ -843,17 +844,17 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): data = data._data # Convert whatever we have into timedelta64[ns] dtype - if is_object_dtype(data) or is_string_dtype(data): + if data.dtype.kind in ['S', 'O']: # no need to make a copy, need to convert if string-dtyped data = objects_to_td64ns(data, unit=unit, errors=errors) copy = False - elif is_integer_dtype(data): + elif data.dtype.kind == 'i': # treat as multiples of the given unit data, copy_made = ints_to_td64ns(data, unit=unit) copy = copy and not copy_made - elif is_float_dtype(data): + elif data.dtype.kind == 'f': # treat as multiples of the given unit. If after converting to nanos, # there are fractional components left, these are truncated # (i.e. NOT rounded) @@ -863,7 +864,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): data[mask] = iNaT copy = False - elif is_timedelta64_dtype(data): + elif data.dtype.kind == 'm': if data.dtype != _TD_DTYPE: # non-nano unit # TODO: watch out for overflows From ab0c928996de09a03efb7d0ad4147438a0a701ca Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Jan 2019 09:17:40 -0800 Subject: [PATCH 5/8] remove unused import --- pandas/core/arrays/timedeltas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 578dfd076b412..89d956a68bf9e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,8 +17,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, - pandas_dtype) + is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) From 37352bb9db2055a67763c5f9e243d0ec3813651b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Jan 2019 09:18:20 -0800 Subject: [PATCH 6/8] py3 compat --- pandas/core/arrays/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 89d956a68bf9e..8164613b23ea4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -843,7 +843,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): data = data._data # Convert whatever we have into timedelta64[ns] dtype - if data.dtype.kind in ['S', 'O']: + if data.dtype.kind in ['S', 'U', 'O']: # no need to make a copy, need to convert if string-dtyped data = objects_to_td64ns(data, unit=unit, errors=errors) copy = False From f273a2c144f34ac9afc1b684ca9f2024ecf3c186 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Jan 2019 09:22:03 -0800 Subject: [PATCH 7/8] assert equality instead of identity --- pandas/core/arrays/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8164613b23ea4..dc6562d64dcfa 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -158,7 +158,7 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): - assert dtype is _TD_DTYPE, dtype + assert dtype == _TD_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) result = object.__new__(cls) From 02acf9d9ebef686e965744cbe2d453aa10962d90 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 9 Jan 2019 06:47:39 -0800 Subject: [PATCH 8/8] revert dtype checks --- pandas/core/arrays/timedeltas.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index dc6562d64dcfa..47b3f93f88b78 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,7 +17,8 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, - is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) + is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, + pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) @@ -843,17 +844,17 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): data = data._data # Convert whatever we have into timedelta64[ns] dtype - if data.dtype.kind in ['S', 'U', 'O']: + if is_object_dtype(data.dtype) or is_string_dtype(data.dtype): # no need to make a copy, need to convert if string-dtyped data = objects_to_td64ns(data, unit=unit, errors=errors) copy = False - elif data.dtype.kind == 'i': + elif is_integer_dtype(data.dtype): # treat as multiples of the given unit data, copy_made = ints_to_td64ns(data, unit=unit) copy = copy and not copy_made - elif data.dtype.kind == 'f': + elif is_float_dtype(data.dtype): # treat as multiples of the given unit. If after converting to nanos, # there are fractional components left, these are truncated # (i.e. NOT rounded) @@ -863,7 +864,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): data[mask] = iNaT copy = False - elif data.dtype.kind == 'm': + elif is_timedelta64_dtype(data.dtype): if data.dtype != _TD_DTYPE: # non-nano unit # TODO: watch out for overflows