diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 1484feeeada64..b066629676e5d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -25,6 +25,7 @@ is_categorical_dtype, is_complex_dtype, is_datetime64_any_dtype, + is_datetime64tz_dtype, is_integer_dtype, is_numeric_dtype, is_sparse, @@ -451,6 +452,7 @@ def wrapper(*args, **kwargs): def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ["transform", "aggregate"] + orig_values = values # can we do this operation with our cython functions # if not raise NotImplementedError @@ -475,23 +477,11 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): "timedelta64 type does not support {} operations".format(how) ) - arity = self._cython_arity.get(how, 1) - - vdim = values.ndim - swapped = False - if vdim == 1: - values = values[:, None] - out_shape = (self.ngroups, arity) - else: - if axis > 0: - swapped = True - assert axis == 1, axis - values = values.T - if arity > 1: - raise NotImplementedError( - "arity of more than 1 is not supported for the 'how' argument" - ) - out_shape = (self.ngroups,) + values.shape[1:] + if is_datetime64tz_dtype(values.dtype): + # Cast to naive; we'll cast back at the end of the function + # TODO: possible need to reshape? kludge can be avoided when + # 2D EA is allowed. + values = values.view("M8[ns]") is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) @@ -513,6 +503,24 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): else: values = values.astype(object) + arity = self._cython_arity.get(how, 1) + + vdim = values.ndim + swapped = False + if vdim == 1: + values = values[:, None] + out_shape = (self.ngroups, arity) + else: + if axis > 0: + swapped = True + assert axis == 1, axis + values = values.T + if arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + out_shape = (self.ngroups,) + values.shape[1:] + try: func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: @@ -581,6 +589,9 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): if swapped: result = result.swapaxes(0, axis) + if is_datetime64tz_dtype(orig_values.dtype): + result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) + return result, names def aggregate(self, values, how, axis=0, min_count=-1): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8ade489e71587..6a2aebe5db246 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -417,9 +417,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): if self._can_hold_element(value): # equivalent: self._try_coerce_args(value) would not raise blocks = self.putmask(mask, value, inplace=inplace) - blocks = [ - b.make_block(values=self._try_coerce_result(b.values)) for b in blocks - ] return self._maybe_downcast(blocks, downcast) # we can't process the value, but nothing to do @@ -734,12 +731,7 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - return result - def _try_coerce_and_cast_result(self, result, dtype=None): - result = self._try_coerce_result(result) result = self._try_cast_result(result, dtype=dtype) return result @@ -1406,7 +1398,7 @@ def func(cond, values, other): try: fastres = expressions.where(cond, values, other) - return self._try_coerce_result(fastres) + return fastres except Exception as detail: if errors == "raise": raise TypeError( @@ -1692,7 +1684,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) mask = _safe_reshape(mask, new_values.shape) new_values[mask] = new - new_values = self._try_coerce_result(new_values) return [self.make_block(values=new_values)] def _try_cast_result(self, result, dtype=None): @@ -1870,20 +1861,6 @@ def _slice(self, slicer): return self.values[slicer] - def _try_cast_result(self, result, dtype=None): - """ - if we have an operation that operates on for example floats - we want to try to cast back to our EA here if possible - - result could be a 2-D numpy array, e.g. the result of - a numeric operation; but it must be shape (1, X) because - we by-definition operate on the ExtensionBlocks one-by-one - - result could also be an EA Array itself, in which case it - is already a 1-D array - """ - return result - def formatting_values(self): # Deprecating the ability to override _formatting_values. # Do the warning here, it's only user in pandas, since we @@ -2443,20 +2420,6 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - if isinstance(result, np.ndarray): - if result.ndim == 2: - # kludge for 2D blocks with 1D EAs - result = result[0, :] - if result.dtype == np.float64: - # needed for post-groupby.median - result = self._holder._from_sequence( - result.astype(np.int64), freq=None, dtype=self.values.dtype - ) - - return result - def diff(self, n, axis=0): """1st discrete difference @@ -2619,10 +2582,6 @@ def _try_coerce_args(self, other): return other - def _try_coerce_result(self, result): - """ reverse of try_coerce_args / try_operate """ - return result - def should_store(self, value): return issubclass( value.dtype.type, np.timedelta64 @@ -3031,16 +2990,6 @@ def array_dtype(self): """ return np.object_ - def _try_coerce_result(self, result): - """ reverse of try_coerce_args """ - - # GH12564: CategoricalBlock is 1-dim only - # while returned results could be any dim - if (not is_categorical_dtype(result)) and isinstance(result, np.ndarray): - result = _block_shape(result, ndim=self.ndim) - - return result - def to_dense(self): # Categorical.get_values returns a DatetimeIndex for datetime # categories, so we can't simply use `np.asarray(self.values)` like diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8956821740bf3..c7318314b8af9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -908,7 +908,7 @@ def fast_xs(self, loc): # Such assignment may incorrectly coerce NaT to None # result[blk.mgr_locs] = blk._slice((slice(None), loc)) for i, rl in enumerate(blk.mgr_locs): - result[rl] = blk._try_coerce_result(blk.iget((i, loc))) + result[rl] = blk.iget((i, loc)) if is_extension_array_dtype(dtype): result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)