From 94c3eb9c54c804791baa4fd36d5dd9f25308362e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Aug 2019 16:13:28 -0700 Subject: [PATCH 1/2] Avoid try/except in blocks, fix setitem bug in datetimelike EA --- pandas/core/arrays/datetimelike.py | 2 ++ pandas/core/internals/blocks.py | 32 +++++++++++++++----------- pandas/tests/arrays/test_datetimes.py | 13 +++++++++++ pandas/tests/arrays/test_timedeltas.py | 13 +++++++++++ pandas/tests/test_base.py | 2 +- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f86b307e5ede3..2206fd3316685 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -473,6 +473,8 @@ def __setitem__( # to a period in from_sequence). For DatetimeArray, it's Timestamp... # I don't know if mypy can do that, possibly with Generics. # https://mypy.readthedocs.io/en/latest/generics.html + if lib.is_scalar(value) and not isna(value): + value = com.maybe_box_datetimelike(value) if is_list_like(value): is_slice = isinstance(key, slice) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6d70fcfb62d52..563faab98d68b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2230,7 +2230,9 @@ def _can_hold_element(self, element): if tipo is not None: if self.is_datetimetz: # require exact match, since non-nano does not exist - return is_dtype_equal(tipo, self.dtype) + return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype( + element, self.dtype + ) # GH#27419 if we get a non-nano datetime64 object return is_datetime64_dtype(tipo) @@ -2500,26 +2502,28 @@ def concat_same_type(self, to_concat, placement=None): def fillna(self, value, limit=None, inplace=False, downcast=None): # We support filling a DatetimeTZ with a `value` whose timezone # is different by coercing to object. - try: + if self._can_hold_element(value): return super().fillna(value, limit, inplace, downcast) - except (ValueError, TypeError): - # different timezones, or a non-tz - return self.astype(object).fillna( - value, limit=limit, inplace=inplace, downcast=downcast - ) + + # different timezones, or a non-tz + return self.astype(object).fillna( + value, limit=limit, inplace=inplace, downcast=downcast + ) def setitem(self, indexer, value): # https://github.com/pandas-dev/pandas/issues/24020 # Need a dedicated setitem until #24020 (type promotion in setitem # for extension arrays) is designed and implemented. - try: + if self._can_hold_element(value) or ( + isinstance(indexer, np.ndarray) and indexer.size == 0 + ): return super().setitem(indexer, value) - except (ValueError, TypeError): - obj_vals = self.values.astype(object) - newb = make_block( - obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim - ) - return newb.setitem(indexer, value) + + obj_vals = self.values.astype(object) + newb = make_block( + obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim + ) + return newb.setitem(indexer, value) def equals(self, other): # override for significant performance improvement diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 58c2f3fc65bb2..49fc8da0a027a 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -179,6 +179,19 @@ def test_setitem_clears_freq(self): a[0] = pd.Timestamp("2000", tz="US/Central") assert a.freq is None + @pytest.mark.parametrize("obj", [ + pd.Timestamp.now(), + pd.Timestamp.now().to_datetime64(), + pd.Timestamp.now().to_pydatetime(), + ]) + def test_setitem_objects(self, obj): + # make sure we accept datetime64 and datetime in addition to Timestamp + dti = pd.date_range("2000", periods=2, freq="D") + arr = dti._data + + arr[0] = obj + assert arr[0] == obj + def test_repeat_preserves_tz(self): dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") arr = DatetimeArray(dti) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 5825f9f150eb8..d3e57737b30ea 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -125,6 +125,19 @@ def test_setitem_clears_freq(self): a[0] = pd.Timedelta("1H") assert a.freq is None + @pytest.mark.parametrize("obj", [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=1).to_timedelta64(), + pd.Timedelta(seconds=1).to_pytimedelta() + ]) + def test_setitem_objects(self, obj): + # make sure we accept timedelta64 and timedelta in addition to Timedelta + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + arr[0] = obj + assert arr[0] == pd.Timedelta(seconds=1) + class TestReductions: def test_min_max(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d75016824d6cf..c760c75e44f6b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -418,7 +418,7 @@ def test_value_counts_unique_nunique_null(self, null_obj): values = o._shallow_copy(v) else: o = o.copy() - o[0:2] = iNaT + o[0:2] = pd.NaT values = o._values elif needs_i8_conversion(o): From 043093959e5cbc3b638001953de85ad374306d72 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Aug 2019 16:41:18 -0700 Subject: [PATCH 2/2] blackify --- pandas/tests/arrays/test_datetimes.py | 13 ++++++++----- pandas/tests/arrays/test_timedeltas.py | 13 ++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 49fc8da0a027a..d749d9bb47d25 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -179,11 +179,14 @@ def test_setitem_clears_freq(self): a[0] = pd.Timestamp("2000", tz="US/Central") assert a.freq is None - @pytest.mark.parametrize("obj", [ - pd.Timestamp.now(), - pd.Timestamp.now().to_datetime64(), - pd.Timestamp.now().to_pydatetime(), - ]) + @pytest.mark.parametrize( + "obj", + [ + pd.Timestamp.now(), + pd.Timestamp.now().to_datetime64(), + pd.Timestamp.now().to_pydatetime(), + ], + ) def test_setitem_objects(self, obj): # make sure we accept datetime64 and datetime in addition to Timestamp dti = pd.date_range("2000", periods=2, freq="D") diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index d3e57737b30ea..540c3343b2a1b 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -125,11 +125,14 @@ def test_setitem_clears_freq(self): a[0] = pd.Timedelta("1H") assert a.freq is None - @pytest.mark.parametrize("obj", [ - pd.Timedelta(seconds=1), - pd.Timedelta(seconds=1).to_timedelta64(), - pd.Timedelta(seconds=1).to_pytimedelta() - ]) + @pytest.mark.parametrize( + "obj", + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=1).to_timedelta64(), + pd.Timedelta(seconds=1).to_pytimedelta(), + ], + ) def test_setitem_objects(self, obj): # make sure we accept timedelta64 and timedelta in addition to Timedelta tdi = pd.timedelta_range("2 Days", periods=4, freq="H")