You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
IndexError Traceback (most recent call last)
Cell In [2], line 4
1import pandas as pd
3 df = pd.DataFrame([], index=pd.Index([], name="a"), columns=["b"])
----> 4 df.groupby("a").agg({"b": lambda x: x.iloc[0]})
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
866 func = maybe_mangle_lambdas(func)
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870ifnot is_dict_like(func) and result isnotNone:
871return result
File PREFIX/lib/python3.10/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
165returnself.apply_str()
167if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169elif is_list_like(arg):
170# we require a list, but not a 'str'171returnself.agg_list_like()
File PREFIX/lib/python3.10/site-packages/pandas/core/apply.py:481, in Apply.agg_dict_like(self)
478 results = {key: colg.agg(how) for key, how in arg.items()}
479else:
480# key used for column selection and output
--> 481 results = {
482 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
483 }
485# set the final keys486 keys =list(arg.keys())
File PREFIX/lib/python3.10/site-packages/pandas/core/apply.py:482, in <dictcomp>(.0)
478 results = {key: colg.agg(how) for key, how in arg.items()}
479else:
480# key used for column selection and output481 results = {
--> 482 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
483 }
485# set the final keys486 keys =list(arg.keys())
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/generic.py:287, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
284returnself._python_agg_general(func, *args, **kwargs)
286try:
--> 287 return self._python_agg_general(func, *args, **kwargs)
288exceptKeyError:
289#TODO: KeyError is raised in _python_agg_general,290# see test_groupby.test_basic291 result =self._aggregate_named(func, *args, **kwargs)
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/groupby.py:1483, in GroupBy._python_agg_general(self, func, *args, **kwargs)
1479 output: dict[base.OutputKey, ArrayLike] = {}
1481 if self.ngroups == 0:
1482 # agg_series below assumes ngroups > 0
-> 1483 return self._python_apply_general(f, self._selected_obj)
1485 for idx, obj in enumerate(self._iterate_slices()):
1486 name = obj.name
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/groupby.py:1464, in GroupBy._python_apply_general(self, f, data, not_indexed_same)
1438 @final
1439 def _python_apply_general(
1440 self,
(...)
1443 not_indexed_same: bool | None = None,
1444 ) -> DataFrame | Series:
1445 """
1446 Apply function f in python space
1447
(...)
1462 data after applying f
1463 """
-> 1464 values, mutated = self.grouper.apply(f, data, self.axis)
1466 if not_indexed_same is None:
1467 not_indexed_same = mutated or self.mutated
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/ops.py:776, in BaseGrouper.apply(self, f, data, axis)
766# getattr pattern for __name__ is needed for functools.partial objects767iflen(group_keys) ==0andgetattr(f, "__name__", None) notin [
768"idxmin",
769"idxmax",
(...)
774# so we will not have raised even if this is an invalid dtype.775# So do one dummy call here to raise appropriate TypeError.
--> 776 f(data.iloc[:0])
778return result_values, mutated
File PREFIX/lib/python3.10/site-packages/pandas/core/groupby/groupby.py:1476, in GroupBy._python_agg_general.<locals>.<lambda>(x)
1473 @final
1474 def _python_agg_general(self, func, *args, **kwargs):
1475 func = com.is_builtin_func(func)
-> 1476 f = lambda x: func(x, *args, **kwargs)
1478 # iterate through "columns" ex exclusions to populate output dict
1479 output: dict[base.OutputKey, ArrayLike] = {}
Cell In [2], line 4, in <lambda>(x)
1import pandas as pd
3 df = pd.DataFrame([], index=pd.Index([], name="a"), columns=["b"])
----> 4 df.groupby("a").agg({"b": lambda x: x.iloc[0]})
File PREFIX/lib/python3.10/site-packages/pandas/core/indexing.py:967, in _LocationIndexer.__getitem__(self, key)
964 axis =self.axis or0966 maybe_callable = com.apply_if_callable(key, self.obj)
--> 967 return self._getitem_axis(maybe_callable, axis=axis)
File PREFIX/lib/python3.10/site-packages/pandas/core/indexing.py:1523, in _iLocIndexer._getitem_axis(self, key, axis)
1520 raise TypeError("Cannot index by location index with a non-integer key")
1522 # validate the location
-> 1523 self._validate_integer(key, axis)
1525 return self.obj._ixs(key, axis=axis)
File PREFIX/lib/python3.10/site-packages/pandas/core/indexing.py:1455, in _iLocIndexer._validate_integer(self, key, axis)
1453 len_axis = len(self.obj._get_axis(axis))
1454 if key >= len_axis or key < -len_axis:
-> 1455 raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
Issue Description
Aggregating on the index of an empty dataframe still executes the aggregation function once on an empty dataframe.
Pandas version checks
I have checked that this issue has not already been reported.
I have confirmed this bug exists on the latest version of pandas.
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
Issue Description
Aggregating on the index of an empty dataframe still executes the aggregation function once on an empty dataframe.
Expected Behavior
This is a regression. In pandas==1.3.5:
Installed Versions
INSTALLED VERSIONS
commit : ca60aab
python : 3.10.6.final.0
python-bits : 64
OS : Linux
OS-release : 4.18.0-348.20.1.el8_5.x86_64
Version : #1 SMP Tue Mar 8 12:56:54 EST 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : en_US.UTF-8
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.4.4
numpy : 1.23.3
pytz : 2022.2.1
dateutil : 2.8.2
setuptools : 65.3.0
pip : 22.2.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : 8.5.0
pandas_datareader: None
bs4 : None
bottleneck : None
brotli : None
fastparquet : None
fsspec : None
gcsfs : None
markupsafe : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : None
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
zstandard : None
The text was updated successfully, but these errors were encountered: