Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
In [23]: import pandas as pd; import numpy as np
In [24]: df = pd.DataFrame([[1, 2, 3],
...: [4, 5, 6],
...: [7, 8, 9],
...: [np.nan, np.nan, np.nan]],
...: columns=['A', 'B', 'C'],
...: index=['a', 'b', 'c', 'd'])
In [25]: df.agg(x=('a', 'max'), y=('b', 'min'), axis=1)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'a'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[25], line 1
----> 1 df.agg(x=('a', 'max'), y=('b', 'min'), axis=1)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/frame.py:10137, in DataFrame.aggregate(self, func, axis, *args, **kwargs)
10135 op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
10136 result = op.agg()
> 10137 result = reconstruct_and_relabel_result(result, func, **kwargs)
10138 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:1913, in reconstruct_and_relabel_result(result, func, **kwargs)
1910 assert columns is not None
1911 assert order is not None
-> 1913 result_in_dict = relabel_result(result, func, columns, order)
1914 result = DataFrame(result_in_dict, index=columns)
1916 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:1862, in relabel_result(result, func, columns, order)
1860 reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1
1861 for col, fun in func.items():
-> 1862 s = result[col].dropna()
1864 # In the `_aggregate`, the callable names are obtained and used in `result`, and
1865 # these names are ordered alphabetically. e.g.
1866 # C2 C1
(...)
1882 # mean 1.5
1883 # mean 1.5
1884 if reorder_mask:
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/frame.py:4090, in DataFrame.__getitem__(self, key)
4088 if self.columns.nlevels > 1:
4089 return self._getitem_multilevel(key)
-> 4090 indexer = self.columns.get_loc(key)
4091 if is_integer(indexer):
4092 indexer = [indexer]
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'a'
In [26]: df.agg(x=('c', 'max'), y=('l', 'min'), axis=1)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[26], line 1
----> 1 df.agg(x=('c', 'max'), y=('l', 'min'), axis=1)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/frame.py:10136, in DataFrame.aggregate(self, func, axis, *args, **kwargs)
10133 axis = self._get_axis_number(axis)
10135 op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
> 10136 result = op.agg()
10137 result = reconstruct_and_relabel_result(result, func, **kwargs)
10138 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:928, in FrameApply.agg(self)
926 result = None
927 try:
--> 928 result = super().agg()
929 finally:
930 self.obj = obj
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:190, in Apply.agg(self)
187 return self.apply_str()
189 if is_dict_like(func):
--> 190 return self.agg_dict_like()
191 elif is_list_like(func):
192 # we require a list, but not a 'str'
193 return self.agg_list_like()
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:423, in Apply.agg_dict_like(self)
415 def agg_dict_like(self) -> DataFrame | Series:
416 """
417 Compute aggregation in the case of a dict-like argument.
418
(...)
421 Result of aggregation.
422 """
--> 423 return self.agg_or_apply_dict_like(op_name="agg")
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:763, in NDFrameApply.agg_or_apply_dict_like(self, op_name)
760 raise NotImplementedError("axis other than 0 is not supported")
762 selection = None
--> 763 result_index, result_data = self.compute_dict_like(
764 op_name, obj, selection, kwargs
765 )
766 result = self.wrap_results_dict_like(obj, result_index, result_data)
767 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:462, in Apply.compute_dict_like(self, op_name, selected_obj, selection, kwargs)
460 is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
461 func = cast(AggFuncTypeDict, self.func)
--> 462 func = self.normalize_dictlike_arg(op_name, selected_obj, func)
464 is_non_unique_col = (
465 selected_obj.ndim == 2
466 and selected_obj.columns.nunique() < len(selected_obj.columns)
467 )
469 if selected_obj.ndim == 1:
470 # key only used for output
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:663, in Apply.normalize_dictlike_arg(self, how, obj, func)
661 cols = Index(list(func.keys())).difference(obj.columns, sort=True)
662 if len(cols) > 0:
--> 663 raise KeyError(f"Column(s) {list(cols)} do not exist")
665 aggregator_types = (list, tuple, dict)
667 # if we have a dict of any non-scalars
668 # eg. {'A' : ['mean']}, normalize all to
669 # be list-likes
670 # Cannot use func.values() because arg may be a Series
KeyError: "Column(s) ['l'] do not exist"
Issue Description
It seems that df.agg does not support using NamedAggregations when axis=1, but the error thrown is inconsistent depending on the contents of the index.
Expected Behavior
A consistent error should be thrown (e.g. a ValueError for axis=1 and func is None/NamedAggregations used), or the first code snippet should work.
Installed Versions
INSTALLED VERSIONS
commit : bdc79c1
python : 3.10.14.final.0
python-bits : 64
OS : Darwin
OS-release : 23.5.0
Version : Darwin Kernel Version 23.5.0: Wed May 1 20:14:38 PDT 2024; root:xnu-10063.121.3~5/RELEASE_ARM64_T6020
machine : arm64
processor : arm
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.1
numpy : 1.26.4
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 68.2.2
pip : 23.3.1
Cython : None
pytest : 7.4.4
hypothesis : None
sphinx : 5.0.2
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.3
IPython : 8.23.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : 2024.3.1
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 16.0.0
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : 1.13.0
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None