Skip to content

Commit 80a3ee4

Browse files
committed
Merge pull request #7560 from jreback/groupby_first_perf
PERF: vbench for mixed groupby with datetime (GH7555)
2 parents 647f771 + bc5599a commit 80a3ee4

File tree

4 files changed

+17
-7
lines changed

4 files changed

+17
-7
lines changed

doc/source/v0.14.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ Performance
144144
- Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`)
145145
- Improvements in Series.transform for significant performance gains (:issue:`6496`)
146146
- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`)
147-
147+
- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`)
148148

149149

150150

pandas/core/groupby.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2332,17 +2332,16 @@ def _cython_agg_blocks(self, how, numeric_only=True):
23322332
data = data.get_numeric_data(copy=False)
23332333

23342334
for block in data.blocks:
2335-
values = block.values
23362335

2337-
is_numeric = is_numeric_dtype(values.dtype)
2336+
values = block._try_operate(block.values)
23382337

2339-
if is_numeric:
2338+
if block.is_numeric:
23402339
values = com.ensure_float(values)
23412340

23422341
result, _ = self.grouper.aggregate(values, how, axis=agg_axis)
23432342

23442343
# see if we can cast the block back to the original dtype
2345-
result = block._try_cast_result(result)
2344+
result = block._try_coerce_and_cast_result(result)
23462345

23472346
newb = make_block(result, placement=block.mgr_locs)
23482347
new_blocks.append(newb)

pandas/core/internals.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,11 @@ def _try_coerce_result(self, result):
412412
""" reverse of try_coerce_args """
413413
return result
414414

415+
def _try_coerce_and_cast_result(self, result, dtype=None):
416+
result = self._try_coerce_result(result)
417+
result = self._try_cast_result(result, dtype=dtype)
418+
return result
419+
415420
def _try_fill(self, value):
416421
return value
417422

@@ -513,8 +518,7 @@ def setitem(self, indexer, value):
513518
dtype, _ = _infer_dtype_from_scalar(value)
514519
else:
515520
dtype = 'infer'
516-
values = self._try_coerce_result(values)
517-
values = self._try_cast_result(values, dtype)
521+
values = self._try_coerce_and_cast_result(values, dtype)
518522
return [make_block(transf(values),
519523
ndim=self.ndim, placement=self.mgr_locs,
520524
fastpath=True)]

vb_suite/groupby.py

+7
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,13 @@ def f():
244244
groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup,
245245
start_date=datetime(2013, 1, 1))
246246

247+
# with datetimes (GH7555)
248+
setup = common_setup + """
249+
df = DataFrame({'a' : date_range('1/1/2011',periods=100000,freq='s'),'b' : range(100000)})
250+
"""
251+
252+
groupby_mixed_first = Benchmark('df.groupby("b").first()', setup,
253+
start_date=datetime(2013, 5, 1))
247254

248255
#----------------------------------------------------------------------
249256
# groupby_indices replacement, chop up Series

0 commit comments

Comments
 (0)