Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
In [9]: import pandas as pd
...: import numpy as np
...: missing_df = pd.DataFrame(
...: {
...: "nan": [np.nan, np.nan, np.nan, np.nan],
...: "na": [pd.NA, pd.NA, pd.NA, pd.NA],
...: "nat": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
...: "none": [None, None, None, None],
...: "values": [1, 2, 3, 4],
...: }
...: )
...: missing_df.agg(x=('nan', 'min'), y=('na', 'min'), z=('values', 'sum'))
Issue Description
The code above produces the following exception:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexing.py:1714, in _iLocIndexer._get_list_axis(self, key, axis)
1713 try:
-> 1714 return self.obj._take_with_is_copy(key, axis=axis)
1715 except IndexError as err:
1716 # re-raise with different error message, e.g. test_getitem_ndarray_3d
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/generic.py:4150, in NDFrame._take_with_is_copy(self, indices, axis)
4141 """
4142 Internal version of the `take` method that sets the `_is_copy`
4143 attribute to keep track of the parent dataframe (using in indexing
(...)
4148 See the docstring of `take` for full explanation of the parameters.
4149 """
-> 4150 result = self.take(indices=indices, axis=axis)
4151 # Maybe set copy if we didn't actually change the index.
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/generic.py:4130, in NDFrame.take(self, indices, axis, **kwargs)
4126 indices = np.arange(
4127 indices.start, indices.stop, indices.step, dtype=np.intp
4128 )
-> 4130 new_data = self._mgr.take(
4131 indices,
4132 axis=self._get_block_manager_axis(axis),
4133 verify=True,
4134 )
4135 return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__(
4136 self, method="take"
4137 )
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/internals/managers.py:891, in BaseBlockManager.take(self, indexer, axis, verify)
890 n = self.shape[axis]
--> 891 indexer = maybe_convert_indices(indexer, n, verify=verify)
893 new_labels = self.axes[axis].take(indexer)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexers/utils.py:282, in maybe_convert_indices(indices, n, verify)
281 if mask.any():
--> 282 raise IndexError("indices are out-of-bounds")
283 return indices
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError Traceback (most recent call last)
Cell In[9], line 12
2 import numpy as np
3 missing_df = pd.DataFrame(
4 {
5 "nan": [np.nan, np.nan, np.nan, np.nan],
(...)
10 }
11 )
---> 12 missing_df.agg(x=('nan', 'min'), y=('na', 'min'), z=('values', 'sum'))
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/frame.py:10137, in DataFrame.aggregate(self, func, axis, *args, **kwargs)
10135 op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
10136 result = op.agg()
> 10137 result = reconstruct_and_relabel_result(result, func, **kwargs)
10138 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:1913, in reconstruct_and_relabel_result(result, func, **kwargs)
1910 assert columns is not None
1911 assert order is not None
-> 1913 result_in_dict = relabel_result(result, func, columns, order)
1914 result = DataFrame(result_in_dict, index=columns)
1916 return result
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/apply.py:1889, in relabel_result(result, func, columns, order)
1885 fun = [
1886 com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
1887 ]
1888 col_idx_order = Index(s.index).get_indexer(fun)
-> 1889 s = s.iloc[col_idx_order]
1891 # assign the new user-provided "named aggregation" as index names, and reindex
1892 # it based on the whole user-provided names.
1893 s.index = reordered_indexes[idx : idx + len(fun)]
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexing.py:1191, in _LocationIndexer.__getitem__(self, key)
1189 maybe_callable = com.apply_if_callable(key, self.obj)
1190 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)
-> 1191 return self._getitem_axis(maybe_callable, axis=axis)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexing.py:1743, in _iLocIndexer._getitem_axis(self, key, axis)
1741 # a list of integers
1742 elif is_list_like_indexer(key):
-> 1743 return self._get_list_axis(key, axis=axis)
1745 # a single integer
1746 else:
1747 key = item_from_zerodim(key)
File ~/.miniconda3/envs/snowpark/lib/python3.10/site-packages/pandas/core/indexing.py:1717, in _iLocIndexer._get_list_axis(self, key, axis)
1714 return self.obj._take_with_is_copy(key, axis=axis)
1715 except IndexError as err:
1716 # re-raise with different error message, e.g. test_getitem_ndarray_3d
-> 1717 raise IndexError("positional indexers are out-of-bounds") from err
IndexError: positional indexers are out-of-bounds
but should succeed.
Expected Behavior
The aggregation should succeed.
Installed Versions
INSTALLED VERSIONS
commit : bdc79c1
python : 3.10.14.final.0
python-bits : 64
OS : Darwin
OS-release : 23.5.0
Version : Darwin Kernel Version 23.5.0: Wed May 1 20:14:38 PDT 2024; root:xnu-10063.121.3~5/RELEASE_ARM64_T6020
machine : arm64
processor : arm
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.1
numpy : 1.26.4
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 68.2.2
pip : 23.3.1
Cython : None
pytest : 7.4.4
hypothesis : None
sphinx : 5.0.2
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.3
IPython : 8.23.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : 2024.3.1
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 16.0.0
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : 1.13.0
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None