From c5f3a36f40120562d24f1ac2c83ee074f4aa432d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Jul 2019 12:42:00 -0700 Subject: [PATCH 1/8] de-kludge quantile, make interpolate_with_fill understand datetime64 --- pandas/core/internals/blocks.py | 40 +++++++++++-------------------- pandas/core/internals/managers.py | 2 +- pandas/core/missing.py | 15 +++++++++++- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4ca867b1088e7..09729de9501b0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1223,7 +1223,6 @@ def _interpolate_with_fill( fill_value=fill_value, dtype=self.dtype, ) - values = self._try_coerce_result(values) blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) @@ -1526,18 +1525,7 @@ def quantile(self, qs, interpolation="linear", axis=0): # We should always have ndim == 2 becase Series dispatches to DataFrame assert self.ndim == 2 - if self.is_datetimetz: - # TODO: cleanup this special case. - # We need to operate on i8 values for datetimetz - # but `Block.get_values()` returns an ndarray of objects - # right now. We need an API for "values to do numeric-like ops on" - values = self.values.view("M8[ns]") - - # TODO: NonConsolidatableMixin shape - # Usual shape inconsistencies for ExtensionBlocks - values = values[None, :] - else: - values = self.get_values() + values = self.get_values() is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) @@ -1576,7 +1564,6 @@ def quantile(self, qs, interpolation="linear", axis=0): result = lib.item_from_zerodim(result) ndim = getattr(result, "ndim", None) or 0 - result = self._try_coerce_result(result) return make_block(result, placement=np.arange(len(result)), ndim=ndim) def _replace_coerce( @@ -1710,7 +1697,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) mask = _safe_reshape(mask, new_values.shape) new_values[mask] = new - new_values = self._try_coerce_result(new_values) return [self.make_block(values=new_values)] def _try_cast_result(self, result, dtype=None): @@ -2293,13 +2279,6 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - if isinstance(result, np.ndarray) and result.dtype.kind == "i": - # needed for _interpolate_with_ffill - result = result.view("M8[ns]") - return result - def to_native_types( self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs ): @@ -2564,6 +2543,19 @@ def equals(self, other): return False return (self.values.view("i8") == other.values.view("i8")).all() + def quantile(self, qs, interpolation="linear", axis=0): + naive = self.values.view("M8[ns]") + + # kludge for 2D block with 1D values + naive = naive.reshape(self.shape) + + blk = self.make_block(naive) + res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis) + + # ravel is kludge for 2D block with 1D values, assumes column-like + aware = self._holder(res_blk.values.ravel(), dtype=self.dtype) + return self.make_block_same_class(aware, ndim=res_blk.ndim) + class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): __slots__ = () @@ -2639,10 +2631,6 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args / try_operate """ - return result - def should_store(self, value): return issubclass( value.dtype.type, np.timedelta64 diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 344d41ed26943..f8511e6445ea5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -908,7 +908,7 @@ def fast_xs(self, loc): # Such assignment may incorrectly coerce NaT to None # result[blk.mgr_locs] = blk._slice((slice(None), loc)) for i, rl in enumerate(blk.mgr_locs): - result[rl] = blk._try_coerce_result(blk.iget((i, loc))) + result[rl] = blk.iget((i, loc)) if is_extension_array_dtype(dtype): result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8f0abc91f7aef..9217612b01b9c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -463,6 +463,15 @@ def interpolate_2d( Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ + if is_datetime64tz_dtype(values): + naive = values.view("M8[ns]") + result = interpolate_2d( + naive, method=method, axis=axis, limit=limit, + fill_value=fill_value, dtype=dtype + ) + return type(values)._from_sequence(result, dtype=values.dtype) + + orig_values = values transf = (lambda x: x) if axis == 0 else (lambda x: x.T) @@ -470,7 +479,7 @@ def interpolate_2d( ndim = values.ndim if values.ndim == 1: if axis != 0: # pragma: no cover - raise AssertionError("cannot interpolate on a ndim == 1 with " "axis != 0") + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") values = values.reshape(tuple((1,) + values.shape)) if fill_value is None: @@ -490,6 +499,10 @@ def interpolate_2d( if ndim == 1: values = values[0] + if orig_values.dtype.kind == 'M': + # convert float back to datetime64 + values = values.astype(orig_values.dtype) + return values From 519ef0d9eb9d6c0ab92c151d6b62ae361aa8fc14 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Jul 2019 13:23:09 -0700 Subject: [PATCH 2/8] remove no-lomnger-necessary --- pandas/core/internals/blocks.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 09729de9501b0..ed4267793a2a1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2456,15 +2456,7 @@ def _try_coerce_result(self, result): result = self._holder._from_sequence( result.astype(np.int64), freq=None, dtype=self.values.dtype ) - elif result.dtype == "M8[ns]": - # otherwise we get here via quantile and already have M8[ns] - result = self._holder._simple_new( - result, freq=None, dtype=self.values.dtype - ) - elif isinstance(result, np.datetime64): - # also for post-quantile - result = self._box_func(result) return result @property From 0b00695405e113fbbced898fd8a54f52406ae764 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Jul 2019 13:48:02 -0700 Subject: [PATCH 3/8] blackify --- pandas/core/missing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9217612b01b9c..6318bfcb83dd5 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -466,8 +466,12 @@ def interpolate_2d( if is_datetime64tz_dtype(values): naive = values.view("M8[ns]") result = interpolate_2d( - naive, method=method, axis=axis, limit=limit, - fill_value=fill_value, dtype=dtype + naive, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=dtype, ) return type(values)._from_sequence(result, dtype=values.dtype) @@ -499,7 +503,7 @@ def interpolate_2d( if ndim == 1: values = values[0] - if orig_values.dtype.kind == 'M': + if orig_values.dtype.kind == "M": # convert float back to datetime64 values = values.astype(orig_values.dtype) From d531e96515041df523daf813fc36fd7757ba3115 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Jul 2019 15:16:43 -0700 Subject: [PATCH 4/8] remove _try_coerce_result --- pandas/core/groupby/ops.py | 11 ++++++++ pandas/core/internals/blocks.py | 48 +-------------------------------- 2 files changed, 12 insertions(+), 47 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index cc8aec4cc243b..bd947180c9d3d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -25,6 +25,7 @@ is_categorical_dtype, is_complex_dtype, is_datetime64_any_dtype, + is_datetime64tz_dtype, is_integer_dtype, is_numeric_dtype, is_sparse, @@ -452,6 +453,16 @@ def wrapper(*args, **kwargs): def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ["transform", "aggregate"] + if is_datetime64tz_dtype(values): + # TODO: possible need to reshape? kludge can be avoided when + # 2D EA is allowed. + naive = values.view("M8[ns]") + result, names = self._cython_operation( + kind, naive, how=how, axis=axis, min_count=min_count, **kwargs + ) + result = type(values)(result.astype(np.int64), dtype=values.dtype) + return result, names + # can we do this operation with our cython functions # if not raise NotImplementedError diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ed4267793a2a1..b498d8df53144 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -417,9 +417,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): if self._can_hold_element(value): # equivalent: self._try_coerce_args(value) would not raise blocks = self.putmask(mask, value, inplace=inplace) - blocks = [ - b.make_block(values=self._try_coerce_result(b.values)) for b in blocks - ] return self._maybe_downcast(blocks, downcast) # we can't process the value, but nothing to do @@ -737,12 +734,7 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - return result - def _try_coerce_and_cast_result(self, result, dtype=None): - result = self._try_coerce_result(result) result = self._try_cast_result(result, dtype=dtype) return result @@ -1409,7 +1401,7 @@ def func(cond, values, other): try: fastres = expressions.where(cond, values, other) - return self._try_coerce_result(fastres) + return fastres except Exception as detail: if errors == "raise": raise TypeError( @@ -1874,20 +1866,6 @@ def _slice(self, slicer): return self.values[slicer] - def _try_cast_result(self, result, dtype=None): - """ - if we have an operation that operates on for example floats - we want to try to cast back to our EA here if possible - - result could be a 2-D numpy array, e.g. the result of - a numeric operation; but it must be shape (1, X) because - we by-definition operate on the ExtensionBlocks one-by-one - - result could also be an EA Array itself, in which case it - is already a 1-D array - """ - return result - def formatting_values(self): # Deprecating the ability to override _formatting_values. # Do the warning here, it's only user in pandas, since we @@ -2445,20 +2423,6 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - if isinstance(result, np.ndarray): - if result.ndim == 2: - # kludge for 2D blocks with 1D EAs - result = result[0, :] - if result.dtype == np.float64: - # needed for post-groupby.median - result = self._holder._from_sequence( - result.astype(np.int64), freq=None, dtype=self.values.dtype - ) - - return result - @property def _box_func(self): return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz) @@ -3031,16 +2995,6 @@ def array_dtype(self): """ return np.object_ - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - - # GH12564: CategoricalBlock is 1-dim only - # while returned results could be any dim - if (not is_categorical_dtype(result)) and isinstance(result, np.ndarray): - result = _block_shape(result, ndim=self.ndim) - - return result - def to_dense(self): # Categorical.get_values returns a DatetimeIndex for datetime # categories, so we can't simply use `np.asarray(self.values)` like From 4318ff658331603fefe07a690e838968c1fca48f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Jul 2019 15:40:15 -0700 Subject: [PATCH 5/8] remove unnecessary --- pandas/core/internals/blocks.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b498d8df53144..4b00f2dc41421 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2423,10 +2423,6 @@ def _try_coerce_args(self, other): return other - @property - def _box_func(self): - return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz) - def diff(self, n, axis=0): """1st discrete difference From 2504066d43ade05b5e4df23eb3b37ca0e149ba1a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 31 Jul 2019 06:51:33 -0700 Subject: [PATCH 6/8] revert --- pandas/core/missing.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f54fa94c75606..bc81fbb7e1ce0 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -463,18 +463,6 @@ def interpolate_2d( Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ - if is_datetime64tz_dtype(values): - naive = values.view("M8[ns]") - result = interpolate_2d( - naive, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=dtype, - ) - return type(values)._from_sequence(result, dtype=values.dtype) - orig_values = values transf = (lambda x: x) if axis == 0 else (lambda x: x.T) From d1eab54d845257d0b98930e7691dbd9319476df2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 07:42:18 -0700 Subject: [PATCH 7/8] do casting in two places instead of one --- pandas/core/groupby/ops.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 27ece787681f0..cbfb70ab35df3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -452,16 +452,7 @@ def wrapper(*args, **kwargs): def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ["transform", "aggregate"] - - if is_datetime64tz_dtype(values): - # TODO: possible need to reshape? kludge can be avoided when - # 2D EA is allowed. - naive = values.view("M8[ns]") - result, names = self._cython_operation( - kind, naive, how=how, axis=axis, min_count=min_count, **kwargs - ) - result = type(values)(result.astype(np.int64), dtype=values.dtype) - return result, names + orig_values = values # can we do this operation with our cython functions # if not raise NotImplementedError @@ -486,6 +477,12 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): "timedelta64 type does not support {} operations".format(how) ) + if is_datetime64tz_dtype(values.dtype): + # Cast to naive; we'll cast back at the end of the function + # TODO: possible need to reshape? kludge can be avoided when + # 2D EA is allowed. + values = values.view("M8[ns]") + arity = self._cython_arity.get(how, 1) vdim = values.ndim @@ -592,6 +589,9 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): if swapped: result = result.swapaxes(0, axis) + if is_datetime64tz_dtype(orig_values.dtype): + result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) + return result, names def aggregate(self, values, how, axis=0, min_count=-1): From e05309718ec009d7b0cd79c6b75dac751e143924 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 16:51:51 -0700 Subject: [PATCH 8/8] requested rearrangement --- pandas/core/groupby/ops.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index cbfb70ab35df3..b066629676e5d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -483,24 +483,6 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): # 2D EA is allowed. values = values.view("M8[ns]") - arity = self._cython_arity.get(how, 1) - - vdim = values.ndim - swapped = False - if vdim == 1: - values = values[:, None] - out_shape = (self.ngroups, arity) - else: - if axis > 0: - swapped = True - assert axis == 1, axis - values = values.T - if arity > 1: - raise NotImplementedError( - "arity of more than 1 is not supported for the 'how' argument" - ) - out_shape = (self.ngroups,) + values.shape[1:] - is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) @@ -521,6 +503,24 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): else: values = values.astype(object) + arity = self._cython_arity.get(how, 1) + + vdim = values.ndim + swapped = False + if vdim == 1: + values = values[:, None] + out_shape = (self.ngroups, arity) + else: + if axis > 0: + swapped = True + assert axis == 1, axis + values = values.T + if arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + out_shape = (self.ngroups,) + values.shape[1:] + try: func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: