Skip to content

Commit 950de20

Browse files
committed
fixups
1 parent b868cbc commit 950de20

File tree

3 files changed

+30
-26
lines changed

3 files changed

+30
-26
lines changed

pandas/core/groupby.py

+22-13
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,13 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
256256
def ax(self):
257257
return self.grouper
258258

259-
def _get_grouper(self, obj):
259+
def _get_grouper(self, obj, validate=True):
260260
"""
261261
Parameters
262262
----------
263263
obj : the subject object
264+
validate : boolean, default True
265+
if True, validate the grouper
264266
265267
Returns
266268
-------
@@ -271,7 +273,8 @@ def _get_grouper(self, obj):
271273
self.grouper, exclusions, self.obj = _get_grouper(self.obj, [self.key],
272274
axis=self.axis,
273275
level=self.level,
274-
sort=self.sort)
276+
sort=self.sort,
277+
validate=validate)
275278
return self.binner, self.grouper, self.obj
276279

277280
def _set_grouper(self, obj, sort=False):
@@ -1739,8 +1742,9 @@ class BaseGrouper(object):
17391742
whether this grouper will give sorted result or not
17401743
group_keys : boolean, default True
17411744
mutated : boolean, default False
1742-
indexer : the indexer created by Grouper
1743-
some grouper (TimeGrouper eg) will sort its axis and its
1745+
indexer : intp array, optional
1746+
the indexer created by Grouper
1747+
some groupers (TimeGrouper) will sort its axis and its
17441748
group_info is also sorted, so need the indexer to reorder
17451749
17461750
"""
@@ -2514,8 +2518,11 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
25142518
# a passed Grouper like, directly get the grouper in the same way
25152519
# as single grouper groupby, use the group_info to get labels
25162520
elif isinstance(self.grouper, Grouper):
2517-
# get the new grouper
2518-
_, grouper, _ = self.grouper._get_grouper(self.obj)
2521+
# get the new grouper; we already have disambiguated
2522+
# what key/level refer to exactly, don't need to
2523+
# check again as we have by this point converted these
2524+
# to an actual value (rather than a pd.Grouper)
2525+
_, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
25192526
if self.name is None:
25202527
self.name = grouper.result_index.name
25212528
self.obj = self.grouper.obj
@@ -2587,12 +2594,12 @@ def ngroups(self):
25872594

25882595
@cache_readonly
25892596
def indices(self):
2590-
# for the situation of groupby list of groupers
2597+
# we have a list of groupers
25912598
if isinstance(self.grouper, BaseGrouper):
25922599
return self.grouper.indices
2593-
else:
2594-
values = _ensure_categorical(self.grouper)
2595-
return values._reverse_indexer()
2600+
2601+
values = _ensure_categorical(self.grouper)
2602+
return values._reverse_indexer()
25962603

25972604
@property
25982605
def labels(self):
@@ -2608,7 +2615,7 @@ def group_index(self):
26082615

26092616
def _make_labels(self):
26102617
if self._labels is None or self._group_index is None:
2611-
# for the situation of groupby list of groupers
2618+
# we have a list of groupers
26122619
if isinstance(self.grouper, BaseGrouper):
26132620
labels = self.grouper.label_info
26142621
uniques = self.grouper.result_index
@@ -2626,7 +2633,7 @@ def groups(self):
26262633

26272634

26282635
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
2629-
mutated=False):
2636+
mutated=False, validate=True):
26302637
"""
26312638
create and return a BaseGrouper, which is an internal
26322639
mapping of how to create the grouper indexers.
@@ -2643,6 +2650,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
26432650
are and then creates a Grouping for each one, combined into
26442651
a BaseGrouper.
26452652
2653+
If validate, then check for key/level overlaps
2654+
26462655
"""
26472656
group_axis = obj._get_axis(axis)
26482657

@@ -2767,7 +2776,7 @@ def is_in_obj(gpr):
27672776

27682777
elif is_in_axis(gpr): # df.groupby('name')
27692778
if gpr in obj:
2770-
if gpr in obj.index.names:
2779+
if validate and gpr in obj.index.names:
27712780
warnings.warn(
27722781
("'%s' is both a column name and an index level.\n"
27732782
"Defaulting to column but "

pandas/core/resample.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -879,14 +879,7 @@ def _downsample(self, how, **kwargs):
879879

880880
if is_subperiod(ax.freq, self.freq):
881881
# Downsampling
882-
if len(new_index) == 0:
883-
bins = []
884-
else:
885-
i8 = memb.asi8
886-
rng = np.arange(i8[0], i8[-1] + 1)
887-
bins = memb.searchsorted(rng, side='right')
888-
grouper = BinGrouper(bins, new_index, indexer=self.groupby.indexer)
889-
return self._groupby_and_aggregate(how, grouper=grouper)
882+
return self._groupby_and_aggregate(how, grouper=self.grouper)
890883
elif is_superperiod(ax.freq, self.freq):
891884
if how == 'ohlc':
892885
# GH #13083
@@ -1112,7 +1105,7 @@ def _get_resampler(self, obj, kind=None):
11121105
"TimedeltaIndex or PeriodIndex, "
11131106
"but got an instance of %r" % type(ax).__name__)
11141107

1115-
def _get_grouper(self, obj):
1108+
def _get_grouper(self, obj, validate=True):
11161109
# create the resampler and return our binner
11171110
r = self._get_resampler(obj)
11181111
r._set_binner()

pandas/tests/groupby/test_timegrouper.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -634,9 +634,11 @@ def test_scalar_call_versus_list_call(self):
634634
'value': [1, 2, 3]
635635
}
636636
data_frame = pd.DataFrame(data_frame).set_index('time')
637-
grouper = pd.TimeGrouper('D')
637+
grouper = pd.Grouper(freq='D')
638+
638639
grouped = data_frame.groupby(grouper)
639-
data1 = grouped.count()
640+
result = grouped.count()
640641
grouped = data_frame.groupby([grouper])
641-
data2 = grouped.count()
642-
assert_frame_equal(data1, data2)
642+
expected = grouped.count()
643+
644+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)