Skip to content
forked from pydata/xarray

Commit ba159bf

Browse files
committed
Introduce Grouper objects.
1 parent f30da34 commit ba159bf

File tree

7 files changed

+398
-301
lines changed

7 files changed

+398
-301
lines changed

xarray/core/common.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -996,11 +996,16 @@ def _resample(
996996
if base is not None and offset is not None:
997997
raise ValueError("base and offset cannot be present at the same time")
998998

999+
index = self._indexes[dim_name].to_pandas_index()
9991000
if base is not None:
1000-
index = self._indexes[dim_name].to_pandas_index()
10011001
offset = _convert_base_to_offset(base, freq, index)
10021002

1003+
group = DataArray(
1004+
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
1005+
)
1006+
10031007
grouper = TimeResampleGrouper(
1008+
group=group,
10041009
freq=freq,
10051010
closed=closed,
10061011
label=label,
@@ -1009,14 +1014,10 @@ def _resample(
10091014
loffset=loffset,
10101015
)
10111016

1012-
group = DataArray(
1013-
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
1014-
)
10151017
return resample_cls(
10161018
self,
1017-
group=group,
1018-
dim=dim_name,
10191019
grouper=grouper,
1020+
dim=dim_name,
10201021
resample_dim=RESAMPLE_DIM,
10211022
restore_coord_dims=restore_coord_dims,
10221023
)

xarray/core/computation.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -524,15 +524,16 @@ def apply_groupby_func(func, *args):
524524
groupbys = [arg for arg in args if isinstance(arg, GroupBy)]
525525
assert groupbys, "must have at least one groupby to iterate over"
526526
first_groupby = groupbys[0]
527-
if any(not first_groupby._group.equals(gb._group) for gb in groupbys[1:]):
527+
(grouper,) = first_groupby.groupers
528+
if any(not grouper.group.equals(gb.groupers[0].group) for gb in groupbys[1:]):
528529
raise ValueError(
529530
"apply_ufunc can only perform operations over "
530531
"multiple GroupBy objects at once if they are all "
531532
"grouped the same way"
532533
)
533534

534-
grouped_dim = first_groupby._group.name
535-
unique_values = first_groupby._unique_coord.values
535+
grouped_dim = grouper.name
536+
unique_values = grouper.unique_coord.values
536537

537538
iterators = []
538539
for arg in args:

xarray/core/dataarray.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -6256,7 +6256,7 @@ def groupby(
62566256
core.groupby.DataArrayGroupBy
62576257
pandas.DataFrame.groupby
62586258
"""
6259-
from xarray.core.groupby import DataArrayGroupBy
6259+
from xarray.core.groupby import DataArrayGroupBy, UniqueGrouper
62606260

62616261
# While we don't generally check the type of every arg, passing
62626262
# multiple dimensions as multiple arguments is common enough, and the
@@ -6269,8 +6269,9 @@ def groupby(
62696269
f"`squeeze` must be True or False, but {squeeze} was supplied"
62706270
)
62716271

6272+
grouper = UniqueGrouper(group)
62726273
return DataArrayGroupBy(
6273-
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
6274+
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
62746275
)
62756276

62766277
def groupby_bins(
@@ -6341,14 +6342,22 @@ def groupby_bins(
63416342
----------
63426343
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
63436344
"""
6344-
from xarray.core.groupby import DataArrayGroupBy
6345+
from xarray.core.groupby import BinGrouper, DataArrayGroupBy
63456346

6346-
return DataArrayGroupBy(
6347-
self,
6348-
group,
6349-
squeeze=squeeze,
6347+
# While we don't generally check the type of every arg, passing
6348+
# multiple dimensions as multiple arguments is common enough, and the
6349+
# consequences hidden enough (strings evaluate as true) to warrant
6350+
# checking here.
6351+
# A future version could make squeeze kwarg only, but would face
6352+
# backward-compat issues.
6353+
if not isinstance(squeeze, bool):
6354+
raise TypeError(
6355+
f"`squeeze` must be True or False, but {squeeze} was supplied"
6356+
)
6357+
6358+
grouper = BinGrouper(
6359+
group=group,
63506360
bins=bins,
6351-
restore_coord_dims=restore_coord_dims,
63526361
cut_kwargs={
63536362
"right": right,
63546363
"labels": labels,
@@ -6357,6 +6366,13 @@ def groupby_bins(
63576366
},
63586367
)
63596368

6369+
return DataArrayGroupBy(
6370+
self,
6371+
grouper,
6372+
squeeze=squeeze,
6373+
restore_coord_dims=restore_coord_dims,
6374+
)
6375+
63606376
def weighted(self, weights: DataArray) -> DataArrayWeighted:
63616377
"""
63626378
Weighted DataArray operations.

xarray/core/dataset.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -8942,7 +8942,7 @@ def groupby(
89428942
Dataset.resample
89438943
DataArray.resample
89448944
"""
8945-
from xarray.core.groupby import DatasetGroupBy
8945+
from xarray.core.groupby import DatasetGroupBy, UniqueGrouper
89468946

89478947
# While we don't generally check the type of every arg, passing
89488948
# multiple dimensions as multiple arguments is common enough, and the
@@ -8955,8 +8955,10 @@ def groupby(
89558955
f"`squeeze` must be True or False, but {squeeze} was supplied"
89568956
)
89578957

8958+
grouper = UniqueGrouper(group)
8959+
89588960
return DatasetGroupBy(
8959-
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
8961+
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
89608962
)
89618963

89628964
def groupby_bins(
@@ -9027,14 +9029,11 @@ def groupby_bins(
90279029
----------
90289030
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
90299031
"""
9030-
from xarray.core.groupby import DatasetGroupBy
9032+
from xarray.core.groupby import BinGrouper, DatasetGroupBy
90319033

9032-
return DatasetGroupBy(
9033-
self,
9034-
group,
9035-
squeeze=squeeze,
9034+
grouper = BinGrouper(
9035+
group=group,
90369036
bins=bins,
9037-
restore_coord_dims=restore_coord_dims,
90389037
cut_kwargs={
90399038
"right": right,
90409039
"labels": labels,
@@ -9043,6 +9042,10 @@ def groupby_bins(
90439042
},
90449043
)
90459044

9045+
return DatasetGroupBy(
9046+
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
9047+
)
9048+
90469049
def weighted(self, weights: DataArray) -> DatasetWeighted:
90479050
"""
90489051
Weighted Dataset operations.

0 commit comments

Comments
 (0)