Skip to content

Commit 251826f

Browse files
committed
BUG: GH15429 transform result of timedelta from datetime
The transform() operation needs to return a like-indexed. To facilitate this, transform starts with a copy of the original series. Then, after the computation for each group, sets the appropriate elements of the copied series equal to the result. At that point is does a type comparison, and discovers that the timedelta is not cast- able to a datetime. closes #10972 Author: Jeff Reback <[email protected]> Author: Stephen Rauch <[email protected]> Closes #15430 from stephenrauch/group-by-transform-timedelta-from-datetime and squashes the following commits: c3b0dd0 [Jeff Reback] PEP fix 2f48549 [Jeff Reback] fixup slow transforms cc43503 [Stephen Rauch] BUG: GH15429 transform result of timedelta from datetime
1 parent e15de4d commit 251826f

File tree

4 files changed

+57
-18
lines changed

4 files changed

+57
-18
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,7 @@ Bug Fixes
637637

638638

639639
- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
640+
- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`)
640641

641642
- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`)
642643
- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)

pandas/core/groupby.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -2890,32 +2890,32 @@ def transform(self, func, *args, **kwargs):
28902890
lambda: getattr(self, func)(*args, **kwargs))
28912891

28922892
# reg transform
2893-
dtype = self._selected_obj.dtype
2894-
result = self._selected_obj.values.copy()
2895-
2893+
klass = self._selected_obj.__class__
2894+
results = []
28962895
wrapper = lambda x: func(x, *args, **kwargs)
2897-
for i, (name, group) in enumerate(self):
2896+
for name, group in self:
28982897
object.__setattr__(group, 'name', name)
28992898
res = wrapper(group)
29002899

29012900
if hasattr(res, 'values'):
29022901
res = res.values
29032902

2904-
# may need to astype
2905-
try:
2906-
common_type = np.common_type(np.array(res), result)
2907-
if common_type != result.dtype:
2908-
result = result.astype(common_type)
2909-
except:
2910-
pass
2911-
29122903
indexer = self._get_index(name)
2913-
result[indexer] = res
2904+
s = klass(res, indexer)
2905+
results.append(s)
29142906

2915-
result = _possibly_downcast_to_dtype(result, dtype)
2916-
return self._selected_obj.__class__(result,
2917-
index=self._selected_obj.index,
2918-
name=self._selected_obj.name)
2907+
from pandas.tools.concat import concat
2908+
result = concat(results).sort_index()
2909+
2910+
# we will only try to coerce the result type if
2911+
# we have a numeric dtype
2912+
dtype = self._selected_obj.dtype
2913+
if is_numeric_dtype(dtype):
2914+
result = _possibly_downcast_to_dtype(result, dtype)
2915+
2916+
result.name = self._selected_obj.name
2917+
result.index = self._selected_obj.index
2918+
return result
29192919

29202920
def _transform_fast(self, func):
29212921
"""

pandas/tests/groupby/test_filters.py

+1
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ def test_filter_against_workaround(self):
216216
grouper = s.apply(lambda x: np.round(x, -1))
217217
grouped = s.groupby(grouper)
218218
f = lambda x: x.mean() > 10
219+
219220
old_way = s[grouped.transform(f).astype('bool')]
220221
new_way = grouped.filter(f)
221222
assert_series_equal(new_way.sort_values(), old_way.sort_values())

pandas/tests/groupby/test_transform.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pandas as pd
55
from pandas.util import testing as tm
6-
from pandas import Series, DataFrame, Timestamp, MultiIndex, concat
6+
from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range
77
from pandas.types.common import _ensure_platform_int
88
from .common import MixIn, assert_fp_equal
99

@@ -190,6 +190,43 @@ def test_transform_bug(self):
190190
expected = Series(np.arange(5, 0, step=-1), name='B')
191191
assert_series_equal(result, expected)
192192

193+
def test_transform_datetime_to_timedelta(self):
194+
# GH 15429
195+
# transforming a datetime to timedelta
196+
df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
197+
expected = pd.Series([
198+
Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
199+
200+
# this does date math without changing result type in transform
201+
base_time = df['A'][0]
202+
result = df.groupby('A')['A'].transform(
203+
lambda x: x.max() - x.min() + base_time) - base_time
204+
assert_series_equal(result, expected)
205+
206+
# this does date math and causes the transform to return timedelta
207+
result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
208+
assert_series_equal(result, expected)
209+
210+
def test_transform_datetime_to_numeric(self):
211+
# GH 10972
212+
# convert dt to float
213+
df = DataFrame({
214+
'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
215+
result = df.groupby('a').b.transform(
216+
lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
217+
218+
expected = Series([-0.5, 0.5], name='b')
219+
assert_series_equal(result, expected)
220+
221+
# convert dt to int
222+
df = DataFrame({
223+
'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
224+
result = df.groupby('a').b.transform(
225+
lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
226+
227+
expected = Series([0, 1], name='b')
228+
assert_series_equal(result, expected)
229+
193230
def test_transform_multiple(self):
194231
grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month])
195232

0 commit comments

Comments
 (0)