From 1043a205d197739500d6c75a9dbc40abf713837e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 15 Jul 2019 18:36:16 -0700 Subject: [PATCH 01/16] stop conflating iNaT with td64-NaT --- pandas/core/internals/blocks.py | 21 +++++++-------------- pandas/core/nanops.py | 8 ++++++++ pandas/tests/series/test_missing.py | 7 +++++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 897a82f9a1968..c0d3368c652ec 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2597,6 +2597,7 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): is_timedelta = True _can_hold_na = True is_numeric = False + fill_value = np.timedelta64("NaT", "ns") def __init__(self, values, placement, ndim=None): if values.dtype != _TD_DTYPE: @@ -2617,15 +2618,11 @@ def _box_func(self): def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: - # TODO: remove the np.int64 support once coerce_values and - # _try_coerce_args both coerce to m8[ns] and not i8. - return issubclass(tipo.type, (np.timedelta64, np.int64)) + return issubclass(tipo.type, np.timedelta64) elif element is NaT: return True elif isinstance(element, (timedelta, np.timedelta64)): return True - elif is_integer(element): - return element == tslibs.iNaT return is_valid_nat_for_dtype(element, self.dtype) def fillna(self, value, **kwargs): @@ -2645,9 +2642,6 @@ def fillna(self, value, **kwargs): value = Timedelta(value, unit="s") return super().fillna(value, **kwargs) - def _coerce_values(self, values): - return values.view("i8") - def _try_coerce_args(self, other): """ Coerce values and other to int64, with null values converted to @@ -2663,13 +2657,12 @@ def _try_coerce_args(self, other): """ if is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.timedelta64("NaT", "ns") elif isinstance(other, (timedelta, np.timedelta64)): - other = Timedelta(other).value + other = Timedelta(other).to_timedelta64() elif hasattr(other, "dtype") and is_timedelta64_dtype(other): - other = other.astype("i8", copy=False).view("i8") + # TODO: can we get here with non-nano dtype? + pass else: # coercion issues # let higher levels handle @@ -2683,7 +2676,7 @@ def _try_coerce_result(self, result): mask = isna(result) if result.dtype.kind in ["i", "f"]: result = result.astype("m8[ns]") - result[mask] = tslibs.iNaT + result[mask] = np.timedelta64("NaT", "ns") elif isinstance(result, (np.integer, np.float)): result = self._box_func(result) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ce14cb22a88ce..aa255d03f9db7 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1362,6 +1362,14 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): quantiles : scalar or array """ # mask is Union[ExtensionArray, ndarray] + if values.dtype.kind == "m": + # need to cast to integer to avoid rounding errors in numpy + result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation) + + # Note: we have to do do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + values = values[~mask] if len(values) == 0: diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index f8a44b7f5639e..adb23fc6b94ea 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -780,9 +780,11 @@ def test_timedelta64_nan(self): td1[0] = td[0] assert not isna(td1[0]) + # GH#16674 iNaT is treated as an integer when given by the user td1[1] = iNaT - assert isna(td1[1]) - assert td1[1].value == iNaT + assert not isna(td1[1]) + assert td1.dtype == np.object_ + assert td1[1] == iNaT td1[1] = td[1] assert not isna(td1[1]) @@ -792,6 +794,7 @@ def test_timedelta64_nan(self): td1[2] = td[2] assert not isna(td1[2]) + # FIXME: don't leave commented-out # boolean setting # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & From f35754c12cba5245d939bf633623571535763d49 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 17:01:12 -0700 Subject: [PATCH 02/16] dont allow iNaT in DatetimeBlock --- pandas/core/internals/blocks.py | 31 ++++++++---------------- pandas/core/nanops.py | 18 ++++++++------ pandas/tests/frame/test_indexing.py | 7 ++++-- pandas/tests/internals/test_internals.py | 2 +- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c0d3368c652ec..722acbcbb5b59 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2180,7 +2180,7 @@ def _holder(self): @property def fill_value(self): - return tslibs.iNaT + return np.datetime64("NaT", "ns") def get_values(self, dtype=None): """ @@ -2257,14 +2257,9 @@ def _can_hold_element(self, element): if self.is_datetimetz: return tz_compare(element.tzinfo, self.dtype.tz) return element.tzinfo is None - elif is_integer(element): - return element == tslibs.iNaT return is_valid_nat_for_dtype(element, self.dtype) - def _coerce_values(self, values): - return values.view("i8") - def _try_coerce_args(self, other): """ Coerce other to dtype 'i8'. NaN and NaT convert to @@ -2281,16 +2276,15 @@ def _try_coerce_args(self, other): base-type other """ if is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.datetime64("NaT", "ns") elif isinstance(other, (datetime, np.datetime64, date)): other = self._box_func(other) if getattr(other, "tz") is not None: raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") - other = other.asm8.view("i8") + other = other.asm8 elif hasattr(other, "dtype") and is_datetime64_dtype(other): - other = other.astype("i8", copy=False).view("i8") + # TODO: can we get here with non-nano? + pass else: # coercion issues # let higher levels handle @@ -2449,8 +2443,7 @@ def _slice(self, slicer): return self.values[slicer] def _coerce_values(self, values): - # asi8 is a view, needs copy - return _block_shape(values.view("i8"), ndim=self.ndim) + return _block_shape(values, ndim=self.ndim) def _try_coerce_args(self, other): """ @@ -2475,21 +2468,17 @@ def _try_coerce_args(self, other): other = self._holder(other, dtype=self.dtype) elif is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.datetime64("NaT", "ns") elif isinstance(other, self._holder): - if other.tz != self.values.tz: + if not tz_compare(other.tz, self.values.tz): raise ValueError("incompatible or non tz-aware value") - other = _block_shape(other.asi8, ndim=self.ndim) + elif isinstance(other, (np.datetime64, datetime, date)): other = tslibs.Timestamp(other) - tz = getattr(other, "tz", None) # test we can have an equal time zone - if tz is None or str(tz) != str(self.values.tz): + if not tz_compare(other.tz, self.values.tz): raise ValueError("incompatible or non tz-aware value") - other = other.value else: raise TypeError(other) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index aa255d03f9db7..507bfa2345e04 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1362,14 +1362,6 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): quantiles : scalar or array """ # mask is Union[ExtensionArray, ndarray] - if values.dtype.kind == "m": - # need to cast to integer to avoid rounding errors in numpy - result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation) - - # Note: we have to do do `astype` and not view because in general we - # have float result at this point, not i8 - return result.astype(values.dtype) - values = values[~mask] if len(values) == 0: @@ -1401,6 +1393,16 @@ def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): ------- quantiles : scalar or array """ + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = nanpercentile( + values.view("i8"), q, axis, na_value.view("i8"), mask, ndim, interpolation + ) + + # Note: we have to do do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + if not lib.is_scalar(mask) and mask.any(): if ndim == 1: return _nanpercentile_1d( diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 0cb7db0e47123..756a6159fc7c5 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1150,6 +1150,7 @@ def test_fancy_index_int_labels_exceptions(self, float_frame): with pytest.raises(KeyError, match=msg): float_frame.ix[:, ["E"]] = 1 + # FIXME: don't leave commented-out # partial setting now allows this GH2578 # pytest.raises(KeyError, float_frame.ix.__setitem__, # (slice(None, None), 'E'), 1) @@ -1676,9 +1677,11 @@ def test_setitem_single_column_mixed_datetime(self): ) assert_series_equal(result, expected) - # set an allowable datetime64 type + # GH#16674 iNaT is treated as an integer when given by the user df.loc["b", "timestamp"] = iNaT - assert isna(df.loc["b", "timestamp"]) + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT # allow this syntax df.loc["c", "timestamp"] = np.nan diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 655e484bc34d1..ae572ae1df8a2 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -338,7 +338,7 @@ def test_try_coerce_arg(self): vals = (np.datetime64("2010-10-10"), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(val) - assert np.int64 == type(coerced) + assert np.datetime64 == type(coerced) assert pd.Timestamp("2010-10-10") == pd.Timestamp(coerced) From 0a4ed9c24d28b6d28c42f8349f1cea325083b9b3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 17:03:05 -0700 Subject: [PATCH 03/16] fix docstring --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 722acbcbb5b59..d1b6f0f44bc4b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2633,8 +2633,8 @@ def fillna(self, value, **kwargs): def _try_coerce_args(self, other): """ - Coerce values and other to int64, with null values converted to - iNaT. values is always ndarray-like, other may not be + Coerce values and other to datetime64[ns], with null values + converted to datetime64("NaT", "ns"). Parameters ---------- From 1fb34fddd3e148e52553b4a3ea3c4fc7bb3bc12e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 21:04:41 -0700 Subject: [PATCH 04/16] coerce less --- pandas/core/groupby/generic.py | 18 +++++----- pandas/core/groupby/groupby.py | 7 +++- pandas/core/internals/blocks.py | 60 +++++++++++---------------------- 3 files changed, 36 insertions(+), 49 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7fd0ca94e7997..da595222331d3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -143,8 +143,10 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): new_blocks = [] new_items = [] deleted_items = [] + no_result = object() for block in data.blocks: - + # Avoid inheriting result from earlier in the loop + result = no_result locs = block.mgr_locs.as_array try: result, _ = self.grouper.aggregate( @@ -171,15 +173,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): except TypeError: # we may have an exception in trying to aggregate # continue and exclude the block - pass - + deleted_items.append(locs) + continue finally: + if result is not no_result: + dtype = block.values.dtype - dtype = block.values.dtype - - # see if we can cast the block back to the original dtype - result = block._try_coerce_and_cast_result(result, dtype=dtype) - newb = block.make_block(result) + # see if we can cast the block back to the original dtype + result = block._try_coerce_and_cast_result(result, dtype=dtype) + newb = block.make_block(result) new_items.append(locs) new_blocks.append(newb) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9aba9723e0546..44d03a386e90a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -34,6 +34,7 @@ class providing the base-class of operations. is_numeric_dtype, is_scalar, ) +from pandas.core.dtypes.generic import ABCDatetimeArray from pandas.core.dtypes.missing import isna, notna from pandas.api.types import is_datetime64_dtype, is_integer_dtype, is_object_dtype @@ -803,8 +804,12 @@ def _try_cast(self, result, obj, numeric_only=False): # Prior results _may_ have been generated in UTC. # Ensure we localize to UTC first before converting # to the target timezone + if isinstance(obj, ABCDatetimeArray): + arr = obj + else: + arr = obj._values try: - result = obj._values._from_sequence( + result = arr._from_sequence( result, dtype="datetime64[ns, UTC]" ) result = result.astype(dtype) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d1b6f0f44bc4b..e8675ba1d7c1b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -719,7 +719,7 @@ def _coerce_values(self, values): ------- ndarray or ExtensionArray """ - return values + return _block_shape(values, ndim=self.ndim) def _try_coerce_args(self, other): """ provide coercion to our input arguments """ @@ -1527,7 +1527,7 @@ def quantile(self, qs, interpolation="linear", axis=0): # We need to operate on i8 values for datetimetz # but `Block.get_values()` returns an ndarray of objects # right now. We need an API for "values to do numeric-like ops on" - values = self.values.asi8 + values = self.values.view("M8[ns]") # TODO: NonConsolidatableMixin shape # Usual shape inconsistencies for ExtensionBlocks @@ -1898,12 +1898,6 @@ def _try_cast_result(self, result, dtype=None): result could also be an EA Array itself, in which case it is already a 1-D array """ - try: - - result = self._holder._from_sequence(result.ravel(), dtype=dtype) - except Exception: - pass - return result def formatting_values(self): @@ -2294,12 +2288,9 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ - if isinstance(result, np.ndarray): - if result.dtype.kind in ["i", "f"]: - result = result.astype("M8[ns]") - - elif isinstance(result, (np.integer, np.float, np.datetime64)): - result = self._box_func(result) + if isinstance(result, np.ndarray) and result.dtype.kind == "i": + # needed for _interpolate_with_ffill + result = result.view("M8[ns]") return result @property @@ -2361,6 +2352,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): is_extension = True _can_hold_element = DatetimeBlock._can_hold_element + fill_value = np.datetime64("NaT", "ns") @property def _holder(self): @@ -2442,9 +2434,6 @@ def _slice(self, slicer): return self.values[loc] return self.values[slicer] - def _coerce_values(self, values): - return _block_shape(values, ndim=self.ndim) - def _try_coerce_args(self, other): """ localize and return i8 for the values @@ -2487,22 +2476,22 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ["i", "f"]: - result = result.astype("M8[ns]") + if result.ndim == 2: + result = result[0, :] + if result.dtype == np.float64: + # needed for post-groupby.median + result = self._holder._from_sequence( + result.astype(np.int64), freq=None, dtype=self.values.dtype + ) + elif result.dtype == "M8[ns]": + # otherwise we get here via quantile and already have M8[ns] + result = self._holder._simple_new( + result, freq=None, dtype=self.values.dtype + ) - elif isinstance(result, (np.integer, np.float, np.datetime64)): + elif isinstance(result, np.datetime64): + # also for post-quantile result = self._box_func(result) - - if isinstance(result, np.ndarray): - # allow passing of > 1dim if its trivial - - if result.ndim > 1: - result = result.reshape(np.prod(result.shape)) - # GH#24096 new values invalidates a frequency - result = self._holder._simple_new( - result, freq=None, dtype=self.values.dtype - ) - return result @property @@ -2661,15 +2650,6 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ - if isinstance(result, np.ndarray): - mask = isna(result) - if result.dtype.kind in ["i", "f"]: - result = result.astype("m8[ns]") - result[mask] = np.timedelta64("NaT", "ns") - - elif isinstance(result, (np.integer, np.float)): - result = self._box_func(result) - return result def should_store(self, value): From be2e42a09b0fd65de4755a924964d2e2e2d9b46c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 21:20:08 -0700 Subject: [PATCH 05/16] cleanup --- pandas/core/internals/blocks.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e8675ba1d7c1b..7b76b81c81fe4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import NaT, lib, tslib, tslibs +from pandas._libs import NaT, lib, tslib, tslibs, Timestamp import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare @@ -2272,8 +2272,8 @@ def _try_coerce_args(self, other): if is_valid_nat_for_dtype(other, self.dtype): other = np.datetime64("NaT", "ns") elif isinstance(other, (datetime, np.datetime64, date)): - other = self._box_func(other) - if getattr(other, "tz") is not None: + other = Timestamp(other) + if other.tz is not None: raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") other = other.asm8 elif hasattr(other, "dtype") and is_datetime64_dtype(other): @@ -2293,10 +2293,6 @@ def _try_coerce_result(self, result): result = result.view("M8[ns]") return result - @property - def _box_func(self): - return tslibs.Timestamp - def to_native_types( self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs ): @@ -2450,9 +2446,7 @@ def _try_coerce_args(self, other): if isinstance(other, ABCSeries): other = self._holder(other) - if isinstance(other, bool): - raise TypeError - elif is_datetime64_dtype(other): + if is_datetime64_dtype(other): # add the tz back other = self._holder(other, dtype=self.dtype) @@ -2589,10 +2583,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return TimedeltaArray - @property - def _box_func(self): - return lambda x: Timedelta(x, unit="ns") - def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: From 1ae64ac396c93bb6ae199297c9179567ca906e34 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 21:41:08 -0700 Subject: [PATCH 06/16] remove unnecessary --- pandas/core/internals/blocks.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7b76b81c81fe4..7277ea1a4fb79 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -391,8 +391,9 @@ def apply(self, func, **kwargs): with np.errstate(all="ignore"): result = func(self.values, **kwargs) if not isinstance(result, Block): + # TODO: do we need this condition? It could only fail if the user + # passed a `func` that itself returned a Block result = self.make_block(values=_block_shape(result, ndim=self.ndim)) - return result def fillna(self, value, limit=None, inplace=False, downcast=None): @@ -2442,15 +2443,7 @@ def _try_coerce_args(self, other): ------- base-type other """ - - if isinstance(other, ABCSeries): - other = self._holder(other) - - if is_datetime64_dtype(other): - # add the tz back - other = self._holder(other, dtype=self.dtype) - - elif is_valid_nat_for_dtype(other, self.dtype): + if is_valid_nat_for_dtype(other, self.dtype): other = np.datetime64("NaT", "ns") elif isinstance(other, self._holder): if not tz_compare(other.tz, self.values.tz): From 512855a6e3393a72187ba0c7d41a9698209972fb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 17 Jul 2019 08:42:57 -0700 Subject: [PATCH 07/16] remove _coerce_values --- pandas/core/internals/blocks.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7277ea1a4fb79..7467cada0919c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -708,20 +708,6 @@ def _try_cast_result(self, result, dtype=None): # may need to change the dtype here return maybe_downcast_to_dtype(result, dtype) - def _coerce_values(self, values): - """ - Coerce values (usually derived from self.values) for an operation. - - Parameters - ---------- - values : ndarray or ExtensionArray - - Returns - ------- - ndarray or ExtensionArray - """ - return _block_shape(values, ndim=self.ndim) - def _try_coerce_args(self, other): """ provide coercion to our input arguments """ @@ -784,7 +770,7 @@ def replace( # try to replace, if we raise an error, convert to ObjectBlock and # retry - values = self._coerce_values(self.values) + values = self.values try: to_replace = self._try_coerce_args(to_replace) except (TypeError, ValueError): @@ -891,7 +877,6 @@ def setitem(self, indexer, value): b = self.astype(dtype) return b.setitem(indexer, value) else: - values = self._coerce_values(values) # can keep its own dtype if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype): dtype = self.dtype @@ -1216,7 +1201,6 @@ def _interpolate_with_fill( return [self.copy()] values = self.values if inplace else self.values.copy() - values = self._coerce_values(values) fill_value = self._try_coerce_args(fill_value) values = missing.interpolate_2d( values, @@ -1424,7 +1408,6 @@ def func(cond, values, other): else: # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) - values = self._coerce_values(values) try: result = func(cond, values, other) except TypeError: @@ -1535,7 +1518,6 @@ def quantile(self, qs, interpolation="linear", axis=0): values = values[None, :] else: values = self.get_values() - values = self._coerce_values(values) is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) @@ -1700,7 +1682,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new_values = self._coerce_values(new_values) new = self._try_coerce_args(new) if isinstance(new, np.ndarray) and len(new) == len(mask): @@ -2405,7 +2386,7 @@ def get_values(self, dtype=None): """ values = self.values if is_object_dtype(dtype): - values = values._box_values(values._data) + values = values.astype(object) values = np.asarray(values) From 0b8bdba7be9fa799789904461f60e11d690754fc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 17 Jul 2019 10:20:27 -0700 Subject: [PATCH 08/16] comments --- pandas/core/internals/blocks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fa18c73e93e65..b57450e585b9f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1825,6 +1825,7 @@ def get_values(self, dtype=None): values = values.reshape((1,) + values.shape) return values + # TODO: should this reshape? def to_dense(self): return np.asarray(self.values) @@ -2402,6 +2403,7 @@ def get_values(self, dtype=None): values = values.reshape(1, -1) return values + # TODO: should this reshape? def to_dense(self): # we request M8[ns] dtype here, even though it discards tzinfo, # as lots of code (e.g. anything using values_from_object) From 6500ed8d1789a900542056cc50d4943c291bcca7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 17:30:32 -0700 Subject: [PATCH 09/16] Cleanup --- pandas/core/internals/blocks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 85f9f51ccb48b..9114ac9ac24a8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -390,9 +390,8 @@ def apply(self, func, **kwargs): with np.errstate(all="ignore"): result = func(self.values, **kwargs) if not isinstance(result, Block): - # TODO: do we need this condition? It could only fail if the user - # passed a `func` that itself returned a Block result = self.make_block(values=_block_shape(result, ndim=self.ndim)) + return result def fillna(self, value, limit=None, inplace=False, downcast=None): @@ -1829,7 +1828,6 @@ def get_values(self, dtype=None): values = values.reshape((1,) + values.shape) return values - # TODO: should this reshape? def to_dense(self): return np.asarray(self.values) @@ -2407,7 +2405,6 @@ def get_values(self, dtype=None): values = values.reshape(1, -1) return values - # TODO: should this reshape? def to_dense(self): # we request M8[ns] dtype here, even though it discards tzinfo, # as lots of code (e.g. anything using values_from_object) From 79128ede808fabd71c7f1c6017d6376c0f9076aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 18:31:54 -0700 Subject: [PATCH 10/16] use extract_array --- pandas/core/groupby/groupby.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 44d03a386e90a..70fc157a9dd5b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -34,7 +34,6 @@ class providing the base-class of operations. is_numeric_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ABCDatetimeArray from pandas.core.dtypes.missing import isna, notna from pandas.api.types import is_datetime64_dtype, is_integer_dtype, is_object_dtype @@ -52,6 +51,7 @@ class providing the base-class of operations. from pandas.core.generic import NDFrame from pandas.core.groupby import base from pandas.core.index import CategoricalIndex, Index, MultiIndex +from pandas.core.internals.arrays import extract_array from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter @@ -804,10 +804,7 @@ def _try_cast(self, result, obj, numeric_only=False): # Prior results _may_ have been generated in UTC. # Ensure we localize to UTC first before converting # to the target timezone - if isinstance(obj, ABCDatetimeArray): - arr = obj - else: - arr = obj._values + arr = extract_array(obj) try: result = arr._from_sequence( result, dtype="datetime64[ns, UTC]" From 5d73147eb958dbea4059583f64467e83c420339b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 09:29:47 -0700 Subject: [PATCH 11/16] blackify --- pandas/core/groupby/groupby.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 70fc157a9dd5b..d4fdaa13ef712 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -806,9 +806,7 @@ def _try_cast(self, result, obj, numeric_only=False): # to the target timezone arr = extract_array(obj) try: - result = arr._from_sequence( - result, dtype="datetime64[ns, UTC]" - ) + result = arr._from_sequence(result, dtype="datetime64[ns, UTC]") result = result.astype(dtype) except TypeError: # _try_cast was called at a point where the result From 6ed302ceff777986f4f6e8403a7743dff4282f20 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 09:30:03 -0700 Subject: [PATCH 12/16] isort --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9114ac9ac24a8..87dd04acf7e3f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import NaT, lib, tslib, tslibs, Timestamp +from pandas._libs import NaT, Timestamp, lib, tslib, tslibs import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare From df6ac0ab73368b422a5216b791d9df596a836157 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 25 Jul 2019 13:23:35 -0700 Subject: [PATCH 13/16] update import --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d4fdaa13ef712..0dff6b4f7fd99 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -47,11 +47,11 @@ class providing the base-class of operations. SpecificationError, ) import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base from pandas.core.index import CategoricalIndex, Index, MultiIndex -from pandas.core.internals.arrays import extract_array from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter From 58489379d8a4d1d3862d877954b6d8f43b20925e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 25 Jul 2019 15:43:25 -0700 Subject: [PATCH 14/16] Fix incorrect test --- pandas/tests/indexing/test_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 31e9cff68445e..fb8f62d7a06c5 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -51,7 +51,7 @@ def test_indexing_with_datetime_tz(self): # indexing result = df.iloc[1] expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, @@ -59,7 +59,7 @@ def test_indexing_with_datetime_tz(self): tm.assert_series_equal(result, expected) result = df.loc[1] expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, From 53585b2d8a59aee78980466d05fa8c21d901e41a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 09:17:46 -0700 Subject: [PATCH 15/16] comment --- pandas/core/internals/blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 80a5288e78be6..ef12f3a96ac96 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2471,6 +2471,7 @@ def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): if result.ndim == 2: + # kludge for 2D blocks with 1D EAs result = result[0, :] if result.dtype == np.float64: # needed for post-groupby.median From ba043bbef3e20a8f4ea021e1f60cd6fa6052d6b1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 09:55:54 -0700 Subject: [PATCH 16/16] post merge fix --- pandas/core/internals/blocks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ef12f3a96ac96..4ca867b1088e7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -868,7 +868,6 @@ def setitem(self, indexer, value): if self._can_hold_element(value): value = self._try_coerce_args(value) - values = self._coerce_values(values) # can keep its own dtype if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype): dtype = self.dtype