Skip to content

Commit efea76b

Browse files
committed
BUG: reset index mapping after each function call in SeriesGrouper, close #1423
1 parent 20b5218 commit efea76b

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

pandas/src/reduce.pyx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ cdef class SeriesBinGrouper:
150150
object res, chunk
151151
bint initialized = 0
152152
Slider vslider, islider
153+
IndexEngine gin
153154

154155
counts = np.zeros(self.ngroups, dtype=np.int64)
155156

@@ -168,6 +169,8 @@ cdef class SeriesBinGrouper:
168169
vslider = Slider(self.arr, self.dummy)
169170
islider = Slider(self.index, self.dummy.index)
170171

172+
gin = <IndexEngine> self.dummy.index._engine
173+
171174
try:
172175
for i in range(self.ngroups):
173176
group_size = counts[i]
@@ -185,6 +188,8 @@ cdef class SeriesBinGrouper:
185188

186189
islider.advance(group_size)
187190
vslider.advance(group_size)
191+
192+
gin.clear_mapping()
188193
except:
189194
raise
190195
finally:
@@ -253,6 +258,7 @@ cdef class SeriesGrouper:
253258
object res, chunk
254259
bint initialized = 0
255260
Slider vslider, islider
261+
IndexEngine gin
256262

257263
labels = self.labels
258264
counts = np.zeros(self.ngroups, dtype=np.int64)
@@ -263,6 +269,7 @@ cdef class SeriesGrouper:
263269
vslider = Slider(self.arr, self.dummy)
264270
islider = Slider(self.index, self.dummy.index)
265271

272+
gin = <IndexEngine> self.dummy.index._engine
266273
try:
267274
for i in range(n):
268275
group_size += 1
@@ -291,6 +298,9 @@ cdef class SeriesGrouper:
291298
vslider.advance(group_size)
292299

293300
group_size = 0
301+
302+
gin.clear_mapping()
303+
294304
except:
295305
raise
296306
finally:

pandas/tests/test_groupby.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,35 @@ def test_groupby_groups_datetimeindex(self):
19331933
groups = grouped.groups
19341934
self.assert_(isinstance(groups.keys()[0], datetime))
19351935

1936+
def test_groupby_reindex_inside_function(self):
1937+
from pandas.tseries.api import DatetimeIndex
1938+
1939+
periods = 1000
1940+
ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
1941+
df = DataFrame({'high': np.arange(periods), 'low': np.arange(periods)}, index=ind)
1942+
1943+
def agg_before(hour, func, fix=False):
1944+
"""
1945+
Run an aggregate func on the subset of data.
1946+
"""
1947+
def _func(data):
1948+
d = data.select(lambda x: x.hour < 11).dropna()
1949+
if fix:
1950+
data[data.index[0]]
1951+
if len(d) == 0:
1952+
return None
1953+
return func(d)
1954+
return _func
1955+
1956+
def afunc(data):
1957+
d = data.select(lambda x: x.hour < 11).dropna()
1958+
return np.max(d)
1959+
1960+
grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
1961+
closure_bad = grouped.agg({'high': agg_before(11, np.max)})
1962+
closure_good = grouped.agg({'high': agg_before(11, np.max, True)})
1963+
1964+
assert_frame_equal(closure_bad, closure_good)
19361965

19371966
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
19381967
tups = map(tuple, df[keys].values)

0 commit comments

Comments
 (0)