diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 44541033fbc43..30617b76b91eb 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -143,7 +143,7 @@ Performance - Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`) - Improvements in Series.transform for significant performance gains (:issue:`6496`) - Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`) - +- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 4d3927428cef2..c2debb9bfe1c0 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2332,17 +2332,16 @@ def _cython_agg_blocks(self, how, numeric_only=True): data = data.get_numeric_data(copy=False) for block in data.blocks: - values = block.values - is_numeric = is_numeric_dtype(values.dtype) + values = block._try_operate(block.values) - if is_numeric: + if block.is_numeric: values = com.ensure_float(values) result, _ = self.grouper.aggregate(values, how, axis=agg_axis) # see if we can cast the block back to the original dtype - result = block._try_cast_result(result) + result = block._try_coerce_and_cast_result(result) newb = make_block(result, placement=block.mgr_locs) new_blocks.append(newb) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6b2d6bcfe3c80..8100b98d6e42d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -412,6 +412,11 @@ def _try_coerce_result(self, result): """ reverse of try_coerce_args """ return result + def _try_coerce_and_cast_result(self, result, dtype=None): + result = self._try_coerce_result(result) + result = self._try_cast_result(result, dtype=dtype) + return result + def _try_fill(self, value): return value @@ -513,8 +518,7 @@ def setitem(self, indexer, value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' - values = self._try_coerce_result(values) - values = self._try_cast_result(values, dtype) + values = self._try_coerce_and_cast_result(values, dtype) return [make_block(transf(values), ndim=self.ndim, placement=self.mgr_locs, fastpath=True)] diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index eac313481aca7..2b6c6f55e5776 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -244,6 +244,13 @@ def f(): groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup, start_date=datetime(2013, 1, 1)) +# with datetimes (GH7555) +setup = common_setup + """ +df = DataFrame({'a' : date_range('1/1/2011',periods=100000,freq='s'),'b' : range(100000)}) +""" + +groupby_mixed_first = Benchmark('df.groupby("b").first()', setup, + start_date=datetime(2013, 5, 1)) #---------------------------------------------------------------------- # groupby_indices replacement, chop up Series