diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 5dd14e243fbb3..64ef6866ac198 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -146,7 +146,7 @@ the columns except the one we specify: .. ipython:: python df2 = df.set_index(["A", "B"]) - grouped = df2.groupby(level=df2.index.names.difference(["B"])) + grouped = df2.groupby(level="A") grouped.sum() The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index 8321200a84b76..37ddb4ee11b83 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -69,12 +69,12 @@ class MaskedUInt8Engine(MaskedIndexEngine): ... class MaskedBoolEngine(MaskedUInt8Engine): ... class BaseMultiIndexCodesEngine: - levels: list[np.ndarray] + levels: tuple[np.ndarray] offsets: np.ndarray # ndarray[uint64_t, ndim=1] def __init__( self, - levels: list[np.ndarray], # all entries hashable + levels: tuple[np.ndarray], # all entries hashable labels: list[np.ndarray], # all entries integer-dtyped offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] ) -> None: ... diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e6dd6a990d285..62ae1c529e763 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5775,7 +5775,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde idx = cast(MultiIndex, idx) levels = list(idx.levels) + [lev] codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] - mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) + mi = MultiIndex(levels=levels, codes=codes, names=list(idx.names) + [None]) else: nidx = len(idx) idx_codes = coerce_indexer_dtype(np.arange(nidx), idx) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cd55997ad5f69..a7f8844627e54 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -169,7 +169,6 @@ disallow_ndim_indexing, is_valid_positional_slice, ) -from pandas.core.indexes.frozen import FrozenList from pandas.core.missing import clean_reindex_fill_method from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op @@ -1725,8 +1724,8 @@ def _get_default_index_names( return names - def _get_names(self) -> FrozenList: - return FrozenList((self.name,)) + def _get_names(self) -> tuple[Hashable, ...]: + return (self.name,) def _set_names(self, values, *, level=None) -> None: """ @@ -1824,7 +1823,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['species', 'year']) + names=('species', 'year')) When renaming levels with a dict, levels can not be passed. @@ -1833,7 +1832,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['snake', 'year']) + names=('snake', 'year')) """ if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError("Level must be None for non-MultiIndex") @@ -1912,13 +1911,13 @@ def rename(self, name, inplace: bool = False): ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['kind', 'year']) + names=('kind', 'year')) >>> idx.rename(['species', 'year']) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['species', 'year']) + names=('species', 'year')) >>> idx.rename('species') Traceback (most recent call last): TypeError: Must pass list-like as `names`. @@ -2077,26 +2076,26 @@ def droplevel(self, level: IndexLabel = 0): Examples -------- >>> mi = pd.MultiIndex.from_arrays( - ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + ... [[1, 2], [3, 4], [5, 6]], names=('x', 'y', 'z')) >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=['x', 'y', 'z']) + names=('x', 'y', 'z')) >>> mi.droplevel() MultiIndex([(3, 5), (4, 6)], - names=['y', 'z']) + names=('y', 'z')) >>> mi.droplevel(2) MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.droplevel('z') MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.droplevel(['x', 'y']) Index([5, 6], dtype='int64', name='z') @@ -6487,7 +6486,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: MultiIndex([(1, 'red'), (2, 'blue'), (3, 'green')], - names=['number', 'color']) + names=('number', 'color')) Check whether the strings in the 'color' level of the MultiIndex are in a list of colors. @@ -7460,7 +7459,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), ('a', 'b')], - names=['L1', 'L2']) + names=('L1', 'L2')) See Also -------- diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py deleted file mode 100644 index 9d528d34e3684..0000000000000 --- a/pandas/core/indexes/frozen.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -frozen (immutable) data structures to support MultiIndexing - -These are used for: - -- .names (FrozenList) - -""" -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - NoReturn, -) - -from pandas.core.base import PandasObject - -from pandas.io.formats.printing import pprint_thing - -if TYPE_CHECKING: - from pandas._typing import Self - - -class FrozenList(PandasObject, list): - """ - Container that doesn't allow setting item *but* - because it's technically hashable, will be used - for lookups, appropriately, etc. - """ - - # Side note: This has to be of type list. Otherwise, - # it messes up PyTables type checks. - - def union(self, other) -> FrozenList: - """ - Returns a FrozenList with other concatenated to the end of self. - - Parameters - ---------- - other : array-like - The array-like whose elements we are concatenating. - - Returns - ------- - FrozenList - The collection difference between self and other. - """ - if isinstance(other, tuple): - other = list(other) - return type(self)(super().__add__(other)) - - def difference(self, other) -> FrozenList: - """ - Returns a FrozenList with elements from other removed from self. - - Parameters - ---------- - other : array-like - The array-like whose elements we are removing self. - - Returns - ------- - FrozenList - The collection difference between self and other. - """ - other = set(other) - temp = [x for x in self if x not in other] - return type(self)(temp) - - # TODO: Consider deprecating these in favor of `union` (xref gh-15506) - # error: Incompatible types in assignment (expression has type - # "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the - # type as overloaded function) - __add__ = __iadd__ = union # type: ignore[assignment] - - def __getitem__(self, n): - if isinstance(n, slice): - return type(self)(super().__getitem__(n)) - return super().__getitem__(n) - - def __radd__(self, other) -> Self: - if isinstance(other, tuple): - other = list(other) - return type(self)(other + list(self)) - - def __eq__(self, other: object) -> bool: - if isinstance(other, (tuple, FrozenList)): - other = list(other) - return super().__eq__(other) - - __req__ = __eq__ - - def __mul__(self, other) -> Self: - return type(self)(super().__mul__(other)) - - __imul__ = __mul__ - - def __reduce__(self): - return type(self), (list(self),) - - # error: Signature of "__hash__" incompatible with supertype "list" - def __hash__(self) -> int: # type: ignore[override] - return hash(tuple(self)) - - def _disabled(self, *args, **kwargs) -> NoReturn: - """ - This method will not function because object is immutable. - """ - raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") - - def __str__(self) -> str: - return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) - - def __repr__(self) -> str: - return f"{type(self).__name__}({str(self)})" - - __setitem__ = __setslice__ = _disabled # type: ignore[assignment] - __delitem__ = __delslice__ = _disabled - pop = append = extend = _disabled - remove = sort = insert = _disabled # type: ignore[assignment] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 144045d40a086..2c702ebfa314a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -102,7 +102,6 @@ ensure_index, get_unanimous_names, ) -from pandas.core.indexes.frozen import FrozenList from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, @@ -301,7 +300,7 @@ class MultiIndex(Index): (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) See further examples for how to construct a MultiIndex in the doc strings of the mentioned helper methods. @@ -312,8 +311,8 @@ class MultiIndex(Index): # initialize to zero-length tuples to make everything work _typ = "multiindex" _names: list[Hashable | None] = [] - _levels = FrozenList() - _codes = FrozenList() + _levels = () + _codes = () _comparables = ["names"] sortorder: int | None @@ -394,16 +393,16 @@ def _validate_codes(self, level: list, code: list): def _verify_integrity( self, - codes: list | None = None, - levels: list | None = None, + codes: tuple | None = None, + levels: tuple | None = None, levels_to_verify: list[int] | range | None = None, ): """ Parameters ---------- - codes : optional list + codes : optional tuple Codes to check for validity. Defaults to current codes. - levels : optional list + levels : optional tuple Levels to check for validity. Defaults to current levels. levels_to_validate: optional list Specifies the levels to verify. @@ -463,11 +462,13 @@ def _verify_integrity( result_codes = [] for i in range(len(levels)): if i in levels_to_verify: - result_codes.append(self._validate_codes(levels[i], codes[i])) + # error: Argument 1 to "_validate_codes" of "MultiIndex" + # has incompatible type "Union[Any, Index]"; expected "List[Any]" + result_codes.append(self._validate_codes(levels[i], codes[i])) # type: ignore[arg-type] # noqa: E501 else: result_codes.append(codes[i]) - new_codes = FrozenList(result_codes) + new_codes = tuple(result_codes) return new_codes @classmethod @@ -510,7 +511,7 @@ def from_arrays( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) """ error_msg = "Input must be a list / sequence of array-likes." if not is_list_like(arrays): @@ -582,7 +583,7 @@ def from_tuples( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") @@ -666,7 +667,7 @@ def from_product( (1, 'purple'), (2, 'green'), (2, 'purple')], - names=['number', 'color']) + names=('number', 'color')) """ from pandas.core.reshape.util import cartesian_product @@ -734,7 +735,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=['a', 'b']) + names=('a', 'b')) Using explicit names, instead of the column names @@ -743,7 +744,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=['state', 'observation']) + names=('state', 'observation')) """ if not isinstance(df, ABCDataFrame): raise TypeError("Input must be a DataFrame") @@ -773,9 +774,9 @@ def _values(self) -> np.ndarray: ): vals = vals.astype(object) - vals = np.array(vals, copy=False) - vals = algos.take_nd(vals, codes, fill_value=index._na_value) - values.append(vals) + array_vals = np.array(vals, copy=False) + array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value) + values.append(array_vals) arr = lib.fast_zip(values) return arr @@ -841,15 +842,17 @@ def size(self) -> int: # Levels Methods @cache_readonly - def levels(self) -> FrozenList: + def levels(self) -> tuple[Index, ...]: # Use cache_readonly to ensure that self.get_locs doesn't repeatedly # create new IndexEngine # https://github.com/pandas-dev/pandas/issues/31648 - result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] + result = tuple( + x._rename(name=name) for x, name in zip(self._levels, self._names) + ) for level in result: # disallow midx.levels[0].name = "foo" level._no_setting_name = True - return FrozenList(result) + return result def _set_levels( self, @@ -872,16 +875,14 @@ def _set_levels( raise ValueError("Length of levels must match length of level.") if level is None: - new_levels = FrozenList( - ensure_index(lev, copy=copy)._view() for lev in levels - ) + new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) level_numbers = list(range(len(new_levels))) else: level_numbers = [self._get_level_number(lev) for lev in level] - new_levels_list = list(self._levels) + new_levels_list: tuple[Index, ...] = list(self._levels) for lev_num, lev in zip(level_numbers, levels): new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() - new_levels = FrozenList(new_levels_list) + new_levels = tuple(new_levels_list) if verify_integrity: new_codes = self._verify_integrity( @@ -890,7 +891,7 @@ def _set_levels( self._codes = new_codes names = self.names - self._levels = new_levels + self._levels: tuple[Index, ...] = new_levels if any(names): self._set_names(names) @@ -935,7 +936,7 @@ def set_levels( (2, 'two'), (3, 'one'), (3, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) MultiIndex([('a', 1), @@ -944,7 +945,7 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels(['a', 'b', 'c'], level=0) MultiIndex([('a', 'one'), ('a', 'two'), @@ -952,7 +953,7 @@ def set_levels( ('b', 'two'), ('c', 'one'), ('c', 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels(['a', 'b'], level='bar') MultiIndex([(1, 'a'), (1, 'b'), @@ -960,7 +961,7 @@ def set_levels( (2, 'b'), (3, 'a'), (3, 'b')], - names=['foo', 'bar']) + names=('foo', 'bar')) If any of the levels passed to ``set_levels()`` exceeds the existing length, all of the values from that argument will @@ -974,10 +975,10 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels - FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) - """ + (Index(['a', 'b', 'c'], dtype='object', name='foo'), Index([1, 2, 3, 4], dtype='int64', name='bar')) + """ # noqa: E501 if isinstance(levels, Index): pass @@ -1030,7 +1031,7 @@ def levshape(self) -> Shape: # Codes Methods @property - def codes(self) -> FrozenList: + def codes(self) -> tuple: return self._codes def _set_codes( @@ -1050,20 +1051,20 @@ def _set_codes( level_numbers: list[int] | range if level is None: - new_codes = FrozenList( + new_codes = tuple( _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) level_numbers = range(len(new_codes)) else: level_numbers = [self._get_level_number(lev) for lev in level] - new_codes_list = list(self._codes) + new_codes_list: list[np.ndarray[np.int8]] = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] new_codes_list[lev_num] = _coerce_indexer_frozen( level_codes, lev, copy=copy ) - new_codes = FrozenList(new_codes_list) + new_codes = tuple(new_codes_list) if verify_integrity: new_codes = self._verify_integrity( @@ -1104,32 +1105,32 @@ def set_codes( (1, 'two'), (2, 'one'), (2, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([1, 0, 1, 0], level=0) MultiIndex([(2, 'one'), (1, 'two'), (2, 'one'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([0, 0, 1, 1], level='bar') MultiIndex([(1, 'one'), (1, 'one'), (2, 'two'), (2, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) """ level, codes = _require_listlike(level, codes, "Codes") @@ -1449,8 +1450,8 @@ def format( # -------------------------------------------------------------------- # Names Methods - def _get_names(self) -> FrozenList: - return FrozenList(self._names) + def _get_names(self) -> tuple[Hashable, ...]: + return tuple(self._names) def _set_names(self, names, *, level=None, validate: bool = True): """ @@ -1523,9 +1524,9 @@ def _set_names(self, names, *, level=None, validate: bool = True): >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=['x', 'y', 'z']) + names=('x', 'y', 'z')) >>> mi.names - FrozenList(['x', 'y', 'z']) + ('x', 'y', 'z') """, ) @@ -1994,7 +1995,7 @@ def remove_unused_levels(self) -> MultiIndex: >>> mi2 = mi[2:].remove_unused_levels() >>> mi2.levels - FrozenList([[1], ['a', 'b']]) + (Index([1], dtype='int64'), Index(['a', 'b'], dtype='object')) """ new_levels = [] new_codes = [] @@ -2423,17 +2424,17 @@ def reorder_levels(self, order) -> MultiIndex: >>> mi MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.reorder_levels(order=[1, 0]) MultiIndex([(3, 1), (4, 2)], - names=['y', 'x']) + names=('y', 'x')) >>> mi.reorder_levels(order=['y', 'x']) MultiIndex([(3, 1), (4, 2)], - names=['y', 'x']) + names=('y', 'x')) """ order = [self._get_level_number(i) for i in order] result = self._reorder_ilevels(order) @@ -2671,7 +2672,8 @@ def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. """ lev = self.levels[0] - codes = self._codes[0] + # error: Tuple index out of range + codes = self._codes[0] # type: ignore[misc] cat = Categorical.from_codes(codes=codes, categories=lev, validate=False) ci = Index(cat) return ci.get_indexer_for(target) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 74e6a6a28ccb0..305c10b5a6773 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -132,7 +132,7 @@ def melt( else: mdata[col] = np.tile(id_data._values, K) - mcolumns = id_vars + var_name + [value_name] + mcolumns = id_vars + list(var_name) + [value_name] if frame.shape[1] > 0: mdata[value_name] = concat( diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6d1ff07e07c76..e03c3edba55a8 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -105,7 +105,6 @@ from pandas import DataFrame from pandas.core import groupby from pandas.core.arrays import DatetimeArray - from pandas.core.indexes.frozen import FrozenList _factorizers = { np.int64: libhashtable.Int64Factorizer, @@ -1734,7 +1733,7 @@ def restore_dropped_levels_multijoin( join_index: Index, lindexer: npt.NDArray[np.intp], rindexer: npt.NDArray[np.intp], -) -> tuple[FrozenList, FrozenList, FrozenList]: +) -> tuple[tuple, tuple, tuple]: """ *this is an internal non-public method* @@ -1766,7 +1765,7 @@ def restore_dropped_levels_multijoin( levels of combined multiindexes labels : np.ndarray[np.intp] labels of combined multiindexes - names : List[Hashable] + names : tuple[Hashable] names of combined multiindex levels """ @@ -1783,9 +1782,9 @@ def _convert_to_multiindex(index: Index) -> MultiIndex: # so that dropped levels can be appended join_index = _convert_to_multiindex(join_index) - join_levels = join_index.levels - join_codes = join_index.codes - join_names = join_index.names + join_levels = list(join_index.levels) + join_codes = list(join_index.codes) + join_names = list(join_index.names) # Iterate through the levels that must be restored for dropped_level_name in dropped_level_names: @@ -1813,7 +1812,11 @@ def _convert_to_multiindex(index: Index) -> MultiIndex: join_codes = join_codes + [restore_codes] # type: ignore[has-type] join_names = join_names + [dropped_level_name] - return join_levels, join_codes, join_names + # error: Incompatible return value type + # (got "Tuple[List[Index], List[Any], List[Any]]", + # expected "Tuple[List[Index], ndarray[Any, dtype[signedinteger[Any]]], + # List[Hashable]]") + return join_levels, join_codes, join_names # type: ignore[return-value] class _OrderedMerge(_MergeOperation): @@ -2239,8 +2242,10 @@ def _get_multiindex_indexer( join_keys: list[ArrayLike], index: MultiIndex, sort: bool ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: # left & right join labels and num. of levels at each location + # error: Argument 1 to "_factorize_keys" has incompatible type "Index"; + # expected "Union[ExtensionArray, ndarray[Any, Any]]" mapped = ( - _factorize_keys(index.levels[n]._values, join_keys[n], sort=sort) + _factorize_keys(index.levels[n]._values, join_keys[n], sort=sort) # type: ignore[arg-type] # noqa: E501 for n in range(index.nlevels) ) zipped = zip(*mapped) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 79354fdd12a2d..861e27dbbacd6 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -396,7 +396,7 @@ def _all_key(key): if isinstance(piece.index, MultiIndex): # We are adding an empty level transformed_piece.index = MultiIndex.from_tuples( - [all_key], names=piece.index.names + [None] + [all_key], names=list(piece.index.names) + [None] ) else: transformed_piece.index = Index([all_key], name=piece.index.name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index bf7c7a1ee4dc7..49c883a1f1610 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -58,7 +58,6 @@ ) from pandas.core.arrays import ExtensionArray - from pandas.core.indexes.frozen import FrozenList class _Unstacker: @@ -334,13 +333,10 @@ def get_new_columns(self, value_columns: Index | None): width = len(value_columns) propagator = np.repeat(np.arange(width), stride) - new_levels: FrozenList | list[Index] + new_levels: tuple | list[Index] if isinstance(value_columns, MultiIndex): - # error: Cannot determine type of "__add__" [has-type] - new_levels = value_columns.levels + ( # type: ignore[has-type] - self.removed_level_full, - ) + new_levels = value_columns.levels + (self.removed_level_full,) new_names = value_columns.names + (self.removed_name,) new_codes = [lab.take(propagator) for lab in value_columns.codes] diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 71d6f9c58e2c2..3a63523b6e501 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3509,7 +3509,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame: from pandas import MultiIndex - index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) + index = MultiIndex.from_tuples(index_list, names=list(arr.index.names) + ["match"]) dtype = _result_dtype(arr) result = arr._constructor_expanddim( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index becbba703f92c..077e192e8770b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -837,13 +837,17 @@ def _apply_pairwise( result_names = list(result.index.names) else: idx_codes, idx_levels = factorize(result.index) + idx_levels = cast(Index, idx_levels) result_codes = [idx_codes] result_levels = [idx_levels] result_names = [result.index.name] # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes - result_levels = groupby_levels + result_levels + # error: Incompatible types in assignment + # (expression has type "List[Union[Index, ndarray[Any, Any]]]", + # variable has type "List[Index]") + result_levels = groupby_levels + result_levels # type: ignore[assignment] result_names = self._grouper.names + result_names result_index = MultiIndex( diff --git a/pandas/tests/frame/methods/test_rename_axis.py b/pandas/tests/frame/methods/test_rename_axis.py index dd4a77c6509b8..908a3f728c749 100644 --- a/pandas/tests/frame/methods/test_rename_axis.py +++ b/pandas/tests/frame/methods/test_rename_axis.py @@ -60,15 +60,15 @@ def test_rename_axis_mapper(self): # Test for renaming index using dict result = df.rename_axis(index={"ll": "foo"}) - assert result.index.names == ["foo", "nn"] + assert result.index.names == ("foo", "nn") # Test for renaming index using a function result = df.rename_axis(index=str.upper, axis=0) - assert result.index.names == ["LL", "NN"] + assert result.index.names == ("LL", "NN") # Test for renaming index providing complete list result = df.rename_axis(index=["foo", "goo"]) - assert result.index.names == ["foo", "goo"] + assert result.index.names == ("foo", "goo") # Test for changing index and columns at same time sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 5984e591dd6c1..ddb67ebbc4e16 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -130,7 +130,7 @@ def test_set_index_names(self): df = tm.makeDataFrame() df.index.name = "name" - assert df.set_index(df.index).index.names == ["name"] + assert df.set_index(df.index).index.names == ("name",) mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) mi2 = MultiIndex.from_arrays( @@ -139,7 +139,7 @@ def test_set_index_names(self): df = df.set_index(["A", "B"]) - assert df.set_index(df.index).index.names == ["A", "B"] + assert df.set_index(df.index).index.names == ("A", "B") # Check that set_index isn't converting a MultiIndex into an Index assert isinstance(df.set_index(df.index).index, MultiIndex) @@ -259,7 +259,7 @@ def test_set_index_pass_single_array( # only valid column keys are dropped # since B is always passed as array above, nothing is dropped expected = df.set_index(["B"], drop=False, append=append) - expected.index.names = [index_name] + name if append else name + expected.index.names = [index_name] + list(name) if append else name tm.assert_frame_equal(result, expected) @@ -432,12 +432,12 @@ def test_set_index_datetime(self): df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) - assert df.index.names == ["datetime", "label"] + assert df.index.names == ("datetime", "label") df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) - assert df.index.names == ["label", "datetime"] + assert df.index.names == ("label", "datetime") df = DataFrame(np.random.default_rng(2).random(6)) idx1 = DatetimeIndex( diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index bd7d882f6d94a..ccd8d27566b24 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -874,7 +874,7 @@ def test_sort_index_level_and_column_label( ) # Get index levels from df_idx - levels = df_idx.index.names + levels = list(df_idx.index.names) # Compute expected by sorting on columns and the setting index expected = df_none.sort_values( @@ -892,7 +892,7 @@ def test_sort_column_level_and_index_label( # GH#14353 # Get levels from df_idx - levels = df_idx.index.names + levels = list(df_idx.index.names) # Compute expected by sorting on axis=0, setting index levels, and then # transposing. For some cases this will result in a frame with diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index b54a795af4fdc..edba13840a605 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -783,7 +783,7 @@ def test_unstack_multi_level_cols(self): [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"] ), ) - assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ("i2", "i1") def test_unstack_multi_level_rows_and_cols(self): # PH 28306: Unstack df with multi level cols and rows @@ -1782,7 +1782,7 @@ def test_stack_unstack_preserve_names( unstacked = frame.unstack() assert unstacked.index.name == "first" - assert unstacked.columns.names == ["exp", "second"] + assert unstacked.columns.names == ("exp", "second") restacked = unstacked.stack(future_stack=future_stack) assert restacked.index.names == frame.index.names diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index fc7aa9e7b2c46..8107d837a3d60 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -38,12 +38,12 @@ def test_set_axis_name_mi(self, func): level_names = ["L1", "L2"] result = methodcaller(func, level_names)(df) - assert result.index.names == level_names - assert result.columns.names == [None, None] + assert result.index.names == tuple(level_names) + assert result.columns.names == (None, None) result = methodcaller(func, level_names, axis=1)(df) - assert result.columns.names == ["L1", "L2"] - assert result.index.names == [None, None] + assert result.columns.names == ("L1", "L2") + assert result.index.names == (None, None) def test_nonzero_single_element(self): # allow single item via bool method diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 3648961eb3808..7ea1dba6a9520 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -24,9 +24,9 @@ def test_set_axis_name_mi(self, func): result = methodcaller(func, ["L1", "L2"])(ser) assert ser.index.name is None - assert ser.index.names == ["l1", "l2"] + assert ser.index.names == ("l1", "l2") assert result.index.name is None - assert result.index.names, ["L1", "L2"] + assert result.index.names, ("L1", "L2") def test_set_axis_name_raises(self): ser = Series([1]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index abcb9f68e0f5c..62c6958cc32d4 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -978,7 +978,7 @@ def test_apply_multi_level_name(category): ).set_index(["A", "B"]) result = df.groupby("B", observed=False).apply(lambda x: x.sum()) tm.assert_frame_equal(result, expected) - assert df.index.names == ["A", "B"] + assert df.index.names == ("A", "B") def test_groupby_apply_datetime_result_dtypes(): diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index efe7b171d630d..b0fa83c27cb67 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -499,5 +499,5 @@ def test_groupby_quantile_nonmulti_levels_order(): tm.assert_series_equal(result, expected) # We need to check that index levels are not sorted - expected_levels = pd.core.indexes.frozen.FrozenList([["B", "A"], [0.2, 0.8]]) + expected_levels = (["B", "A"], [0.2, 0.8]) tm.assert_equal(result.index.levels, expected_levels) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 944dda8977882..9c890fea1abd7 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -109,7 +109,7 @@ def rebuild_index(df): gr = df.groupby(keys, sort=isort) right = gr["3rd"].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ["3rd"] + right.index.names = tuple(list(right.index.names[:-1]) + ["3rd"]) # https://github.com/pandas-dev/pandas/issues/49909 right = right.rename(name) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 3cc4fa4713831..495d5cd90601a 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -50,7 +50,7 @@ def idx_dup(): def index_names(): # names that match those in the idx fixture for testing equality of # names assigned to the idx - return ["first", "second"] + return ("first", "second") @pytest.fixture diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 29908537fbe59..c993f425fa132 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -11,7 +11,7 @@ def test_astype(idx): actual = idx.astype("O") tm.assert_copy(actual.levels, expected.levels) tm.assert_copy(actual.codes, expected.codes) - assert actual.names == list(expected.names) + assert actual.names == expected.names with pytest.raises(TypeError, match="^Setting.*dtype.*object"): idx.astype(np.dtype(int)) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 91ec1b2475cde..cd8b1c93d8459 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -29,7 +29,7 @@ def test_constructor_single_level(): assert isinstance(result, MultiIndex) expected = Index(["foo", "bar", "baz", "qux"], name="first") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["first"] + assert result.names == ("first",) def test_constructor_no_levels(): @@ -279,7 +279,7 @@ def test_from_arrays_empty(): assert isinstance(result, MultiIndex) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["A"] + assert result.names == ("A",) # N levels for N in [2, 3]: @@ -427,7 +427,7 @@ def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=["A"]) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["A"] + assert result.names == ("A",) @pytest.mark.parametrize( @@ -716,7 +716,7 @@ def test_from_frame_dtype_fidelity(): @pytest.mark.parametrize( - "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] + "names_in,names_out", [(None, (("L1", "x"), ("L2", "y"))), (["x", "y"], ("x", "y"))] ) def test_from_frame_valid_names(names_in, names_out): # GH 22420 @@ -816,13 +816,13 @@ def test_constructor_with_tz(): result = MultiIndex.from_arrays([index, columns]) - assert result.names == ["dt1", "dt2"] + assert result.names == ("dt1", "dt2") tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) result = MultiIndex.from_arrays([Series(index), Series(columns)]) - assert result.names == ["dt1", "dt2"] + assert result.names == ("dt1", "dt2") tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 2e09a580f9528..14d327093500e 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -70,7 +70,7 @@ def test_copy_method(deep): @pytest.mark.parametrize( "kwarg, value", [ - ("names", ["third", "fourth"]), + ("names", ("third", "fourth")), ], ) def test_copy_method_kwargs(deep, kwarg, value): diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index ee1edaa27f804..93067789d57a1 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -93,7 +93,7 @@ def test_duplicate_multiindex_codes(): mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]]) -@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) +@pytest.mark.parametrize("names", [("a", "b", "a"), (1, 1, 2), (1, "a", 1)]) def test_duplicate_level_names(names): # GH18872, GH19029 mi = MultiIndex.from_product([[0, 1]] * 3, names=names) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index 011f61fac90e8..bdfacd87d02be 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -90,14 +90,14 @@ def test_repr_max_seq_items_equal_to_n(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ MultiIndex([('foo', 'one')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected result = idx.__repr__() @@ -108,7 +108,7 @@ def test_repr(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected with pd.option_context("display.max_seq_items", 5): @@ -119,7 +119,7 @@ def test_repr(self, idx): ... ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'], length=6)""" + names=('first', 'second'), length=6)""" assert result == expected # display.max_seq_items == 1 @@ -128,7 +128,7 @@ def test_repr(self, idx): expected = """\ MultiIndex([... ('qux', 'two')], - names=['first', ...], length=6)""" + names=('first', ...), length=6)""" assert result == expected def test_rjust(self, narrow_multi_index): @@ -136,7 +136,7 @@ def test_rjust(self, narrow_multi_index): result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], - names=['a', 'b', 'dti'])""" + names=('a', 'b', 'dti'))""" assert result == expected result = mi[::500].__repr__() @@ -145,7 +145,7 @@ def test_rjust(self, narrow_multi_index): ( 'a', 9, '2000-01-01 00:08:20'), ('abc', 10, '2000-01-01 00:16:40'), ('abc', 10, '2000-01-01 00:25:00')], - names=['a', 'b', 'dti'])""" + names=('a', 'b', 'dti'))""" assert result == expected result = mi.__repr__() @@ -171,14 +171,14 @@ def test_rjust(self, narrow_multi_index): ('abc', 10, '2000-01-01 00:33:17'), ('abc', 10, '2000-01-01 00:33:18'), ('abc', 10, '2000-01-01 00:33:19')], - names=['a', 'b', 'dti'], length=2000)""" + names=('a', 'b', 'dti'), length=2000)""" assert result == expected def test_tuple_width(self, wide_multi_index): mi = wide_multi_index result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501 + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" # noqa: E501 assert result == expected result = mi[:10].__repr__() @@ -193,7 +193,7 @@ def test_tuple_width(self, wide_multi_index): ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" assert result == expected result = mi.__repr__() @@ -219,5 +219,5 @@ def test_tuple_width(self, wide_multi_index): ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'), length=2000)""" assert result == expected diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 0720a1e1c648c..4cea48ec0c911 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -98,14 +98,14 @@ def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): def test_set_name_methods(idx, index_names): # so long as these are synonyms, we don't need to test set_names assert idx.rename == idx.set_names - new_names = [name + "SUFFIX" for name in index_names] + new_names = tuple(name + "SUFFIX" for name in index_names) ind = idx.set_names(new_names) assert idx.names == index_names assert ind.names == new_names msg = "Length of names must match number of levels in MultiIndex" with pytest.raises(ValueError, match=msg): ind.set_names(new_names + new_names) - new_names2 = [name + "SUFFIX2" for name in new_names] + new_names2 = tuple(name + "SUFFIX2" for name in new_names) res = ind.set_names(new_names2, inplace=True) assert res is None assert ind.names == new_names2 @@ -113,11 +113,11 @@ def test_set_name_methods(idx, index_names): # set names for specific level (# GH7792) ind = idx.set_names(new_names[0], level=0) assert idx.names == index_names - assert ind.names == [new_names[0], index_names[1]] + assert ind.names == (new_names[0], index_names[1]) res = ind.set_names(new_names2[0], level=0, inplace=True) assert res is None - assert ind.names == [new_names2[0], index_names[1]] + assert ind.names == (new_names2[0], index_names[1]) # set names for multiple levels ind = idx.set_names(new_names, level=[0, 1]) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 45dd484eff4c6..6b0c287acece5 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -207,7 +207,9 @@ def test_can_hold_identifiers(idx): def test_metadata_immutable(idx): levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile("does not support mutable operations") + mutable_regex = re.compile( + "does not support mutable operations|does not support item assignment" + ) with pytest.raises(TypeError, match=mutable_regex): levels[0] = levels[0] with pytest.raises(TypeError, match=mutable_regex): diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 8ae643eb3626d..f0dc805ec1939 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -60,20 +60,20 @@ def test_copy_names(): multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx1.names == ["MyName1", "MyName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx1.names == ("MyName1", "MyName2") multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx2) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx2.names == ["NewName1", "NewName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx2.names == ("NewName1", "NewName2") multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx3) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx3.names == ["NewName1", "NewName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx3.names == ("NewName1", "NewName2") # gh-35592 with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): @@ -85,8 +85,8 @@ def test_copy_names(): def test_names(idx, index_names): # names are assigned in setup - assert index_names == ["first", "second"] - level_names = [level.name for level in idx.levels] + assert index_names == ("first", "second") + level_names = tuple(level.name for level in idx.levels) assert level_names == index_names # setting bad names on existing diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 77a527134b79f..2a7d84d27b969 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -12,13 +12,13 @@ def test_reindex(idx): result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) - assert result.names == ["first", "second"] + assert result.names == ("first", "second") assert [level.name for level in result.levels] == ["first", "second"] result, indexer = idx.reindex(list(idx)) assert isinstance(result, MultiIndex) assert indexer is None - assert result.names == ["first", "second"] + assert result.names == ("first", "second") assert [level.name for level in result.levels] == ["first", "second"] @@ -52,27 +52,27 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) # list & ndarray cases - assert idx.reindex([])[0].names == [None, None] - assert idx.reindex(np.array([]))[0].names == [None, None] - assert idx.reindex(target.tolist())[0].names == [None, None] - assert idx.reindex(target.values)[0].names == [None, None] - assert idx.reindex(other_dtype.tolist())[0].names == [None, None] - assert idx.reindex(other_dtype.values)[0].names == [None, None] + assert idx.reindex([])[0].names == (None, None) + assert idx.reindex(np.array([]))[0].names == (None, None) + assert idx.reindex(target.tolist())[0].names == (None, None) + assert idx.reindex(target.values)[0].names == (None, None) + assert idx.reindex(other_dtype.tolist())[0].names == (None, None) + assert idx.reindex(other_dtype.values)[0].names == (None, None) idx.names = ["foo", "bar"] - assert idx.reindex([])[0].names == ["foo", "bar"] - assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] - assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] - assert idx.reindex(target.values)[0].names == ["foo", "bar"] - assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] - assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + assert idx.reindex([])[0].names == ("foo", "bar") + assert idx.reindex(np.array([]))[0].names == ("foo", "bar") + assert idx.reindex(target.tolist())[0].names == ("foo", "bar") + assert idx.reindex(target.values)[0].names == ("foo", "bar") + assert idx.reindex(other_dtype.tolist())[0].names == ("foo", "bar") + assert idx.reindex(other_dtype.values)[0].names == ("foo", "bar") def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): # GH7774 idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) - assert idx.reindex([], level=0)[0].names == ["foo", "bar"] - assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + assert idx.reindex([], level=0)[0].names == ("foo", "bar") + assert idx.reindex([], level=1)[0].names == ("foo", "bar") def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 06dbb33aadf97..1bf91a09ee754 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -23,7 +23,7 @@ def test_insert(idx): exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") tm.assert_index_equal(new_index.levels[0], exp0) - assert new_index.names == ["first", "second"] + assert new_index.names == ("first", "second") exp1 = Index(list(idx.levels[1]) + ["three"], name="second") tm.assert_index_equal(new_index.levels[1], exp1) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index c951403fb2654..efb4c88b37f98 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -121,7 +121,7 @@ def test_multiindex_symmetric_difference(): idx2 = idx.copy().rename(["A", "B"]) result = idx.symmetric_difference(idx2) - assert result.names == [None, None] + assert result.names == (None, None) def test_empty(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 08c1a4092952c..7ed4d39e99f99 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -16,7 +16,6 @@ Timestamp, ) import pandas._testing as tm -from pandas.core.indexes.frozen import FrozenList def test_sortlevel(idx): @@ -289,7 +288,7 @@ def test_remove_unused_levels_with_nan(): idx = idx.set_levels(["a", np.nan], level="id1") idx = idx.remove_unused_levels() result = idx.levels - expected = FrozenList([["a", np.nan], [4]]) + expected = (Index(["a", np.nan], name="id1"), Index([4], name="id2")) assert str(result) == str(expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bc04c1c6612f4..9b42c57d0a18f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -899,7 +899,7 @@ def test_isin_level_kwarg_bad_level_raises(self, index): @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) def test_isin_level_kwarg_bad_label_raises(self, label, index): if isinstance(index, MultiIndex): - index = index.rename(["foo", "bar"] + index.names[2:]) + index = index.rename(["foo", "bar"] + list(index.names[2:])) msg = f"'Level {label} not found'" else: index = index.rename("foo") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 6245a129afedc..a01ed606f5f51 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -120,7 +120,7 @@ def test_set_name_methods(self, index_flat): # should return None assert res is None assert index.name == new_name - assert index.names == [new_name] + assert index.names == (new_name,) with pytest.raises(ValueError, match="Level must be None"): index.set_names("a", level=0) @@ -128,7 +128,7 @@ def test_set_name_methods(self, index_flat): name = ("A", "B") index.rename(name, inplace=True) assert index.name == name - assert index.names == [name] + assert index.names == (name,) @pytest.mark.xfail def test_set_names_single_label_no_level(self, index_flat): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py deleted file mode 100644 index ace66b5b06a51..0000000000000 --- a/pandas/tests/indexes/test_frozen.py +++ /dev/null @@ -1,113 +0,0 @@ -import re - -import pytest - -from pandas.core.indexes.frozen import FrozenList - - -@pytest.fixture -def lst(): - return [1, 2, 3, 4, 5] - - -@pytest.fixture -def container(lst): - return FrozenList(lst) - - -@pytest.fixture -def unicode_container(): - return FrozenList(["\u05d0", "\u05d1", "c"]) - - -class TestFrozenList: - def check_mutable_error(self, *args, **kwargs): - # Pass whatever function you normally would to pytest.raises - # (after the Exception kind). - mutable_regex = re.compile("does not support mutable operations") - msg = "'(_s)?re.(SRE_)?Pattern' object is not callable" - with pytest.raises(TypeError, match=msg): - mutable_regex(*args, **kwargs) - - def test_no_mutable_funcs(self, container): - def setitem(): - container[0] = 5 - - self.check_mutable_error(setitem) - - def setslice(): - container[1:2] = 3 - - self.check_mutable_error(setslice) - - def delitem(): - del container[0] - - self.check_mutable_error(delitem) - - def delslice(): - del container[0:3] - - self.check_mutable_error(delslice) - - mutable_methods = ("extend", "pop", "remove", "insert") - - for meth in mutable_methods: - self.check_mutable_error(getattr(container, meth)) - - def test_slicing_maintains_type(self, container, lst): - result = container[1:2] - expected = lst[1:2] - self.check_result(result, expected) - - def check_result(self, result, expected): - assert isinstance(result, FrozenList) - assert result == expected - - def test_string_methods_dont_fail(self, container): - repr(container) - str(container) - bytes(container) - - def test_tricky_container(self, unicode_container): - repr(unicode_container) - str(unicode_container) - - def test_add(self, container, lst): - result = container + (1, 2, 3) - expected = FrozenList(lst + [1, 2, 3]) - self.check_result(result, expected) - - result = (1, 2, 3) + container - expected = FrozenList([1, 2, 3] + lst) - self.check_result(result, expected) - - def test_iadd(self, container, lst): - q = r = container - - q += [5] - self.check_result(q, lst + [5]) - - # Other shouldn't be mutated. - self.check_result(r, lst) - - def test_union(self, container, lst): - result = container.union((1, 2, 3)) - expected = FrozenList(lst + [1, 2, 3]) - self.check_result(result, expected) - - def test_difference(self, container): - result = container.difference([2]) - expected = FrozenList([1, 3, 4, 5]) - self.check_result(result, expected) - - def test_difference_dupe(self): - result = FrozenList([1, 2, 3, 2]).difference([2]) - expected = FrozenList([1, 3]) - self.check_result(result, expected) - - def test_tricky_container_to_bytes_raises(self, unicode_container): - # GH 26447 - msg = "^'str' object cannot be interpreted as an integer$" - with pytest.raises(TypeError, match=msg): - bytes(unicode_container) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index de989ad550f2b..7928a1435a660 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -161,7 +161,7 @@ def test_getitem_intkey_leading_level( # GH#33355 dont fall-back to positional when leading level is int ymd = multiindex_year_month_day_dataframe_random_data levels = ymd.index.levels - ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + list(levels[1:])) ser = ymd["A"] mi = ser.index assert isinstance(mi, MultiIndex) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index fc2edc7559a48..8c04e3094df89 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -112,13 +112,13 @@ def test_multiindex(self, df_schema): {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, ], - "primaryKey": ["level_0", "level_1"], + "primaryKey": ("level_0", "level_1"), } assert result == expected df.index.names = ["idx0", None] expected["fields"][0]["name"] = "idx0" - expected["primaryKey"] = ["idx0", "level_1"] + expected["primaryKey"] = ("idx0", "level_1") result = build_table_schema(df, version=False) assert result == expected @@ -585,21 +585,21 @@ def test_categorical(self): (pd.Index([1], name="myname"), "myname", "name"), ( pd.MultiIndex.from_product([("a", "b"), ("c", "d")]), - ["level_0", "level_1"], + ("level_0", "level_1"), "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", "n2"] ), - ["n1", "n2"], + ("n1", "n2"), "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", None] ), - ["n1", "level_1"], + ("n1", "level_1"), "names", ), ], diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 0ff84bcf136cd..339ab70cfecba 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -919,7 +919,7 @@ def test_columns_multiindex_modified(tmp_path, setup_path): df.index.name = "letters" df = df.set_index(keys="E", append=True) - data_columns = df.index.names + df.columns.tolist() + data_columns = list(df.index.names) + df.columns.tolist() path = tmp_path / setup_path df.to_hdf( path, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index bbdb22955297e..afd5c13756443 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1967,15 +1967,15 @@ def test_read_table_index_col(self, test_frame1): sql.to_sql(test_frame1, "test_frame", self.conn) result = sql.read_sql_table("test_frame", self.conn, index_col="index") - assert result.index.names == ["index"] + assert result.index.names == ("index",) result = sql.read_sql_table("test_frame", self.conn, index_col=["A", "B"]) - assert result.index.names == ["A", "B"] + assert result.index.names == ("A", "B") result = sql.read_sql_table( "test_frame", self.conn, index_col=["A", "B"], columns=["C", "D"] ) - assert result.index.names == ["A", "B"] + assert result.index.names == ("A", "B") assert result.columns.tolist() == ["C", "D"] def test_read_sql_delegate(self): diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 5dde863f246d1..c5b5024c11b00 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -142,7 +142,7 @@ def test_concat_keys_specific_levels(self): tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) - assert result.columns.names == ["group_key", None] + assert result.columns.names == ("group_key", None) @pytest.mark.parametrize("mapping", ["mapping", "dict"]) def test_concat_mapping(self, mapping, non_dict_mapping_subclass): diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 2b6ebded3d325..d561baf3b87c8 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -135,7 +135,7 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] + assert result.columns.names == ("b", "c") all_cols = result["All", ""] exp_cols = df.groupby(["a"]).size().astype("i8") @@ -173,7 +173,7 @@ def test_crosstab_margins_set_margin_name(self): ) assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] + assert result.columns.names == ("b", "c") all_cols = result["TOTAL", ""] exp_cols = df.groupby(["a"]).size().astype("i8") diff --git a/pandas/tests/series/methods/test_rename_axis.py b/pandas/tests/series/methods/test_rename_axis.py index 58c095d697ede..60175242a06b5 100644 --- a/pandas/tests/series/methods/test_rename_axis.py +++ b/pandas/tests/series/methods/test_rename_axis.py @@ -15,13 +15,13 @@ def test_rename_axis_mapper(self): ser = Series(list(range(len(mi))), index=mi) result = ser.rename_axis(index={"ll": "foo"}) - assert result.index.names == ["foo", "nn"] + assert result.index.names == ("foo", "nn") result = ser.rename_axis(index=str.upper, axis=0) - assert result.index.names == ["LL", "NN"] + assert result.index.names == ("LL", "NN") result = ser.rename_axis(index=["foo", "goo"]) - assert result.index.names == ["foo", "goo"] + assert result.index.names == ("foo", "goo") with pytest.raises(TypeError, match="unexpected"): ser.rename_axis(columns="wrong") diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index e8a1c961c8cb6..32406cc9956d0 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -205,18 +205,6 @@ class MyList(list): val = MyList([True]) assert com.is_bool_indexer(val) - def test_frozenlist(self): - # GH#42461 - data = {"col1": [1, 2], "col2": [3, 4]} - df = pd.DataFrame(data=data) - - frozen = df.index.names[1:] - assert not com.is_bool_indexer(frozen) - - result = df[frozen] - expected = df[[]] - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("with_exception", [True, False]) def test_temp_setattr(with_exception): diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 15263db9ec645..9197b35406e87 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -198,8 +198,8 @@ def test_index_equal_names(name1, name2): msg = f"""Index are different Attribute "names" are different -\\[left\\]: \\[{name1}\\] -\\[right\\]: \\[{name2}\\]""" +\\[left\\]: \\({name1},\\) +\\[right\\]: \\({name2},\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 3fe922539780d..ddc4f4e6c070b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -591,7 +591,7 @@ def test_multi_index_names(): result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == [None, "1", "2"] + assert result.index.names == (None, "1", "2") def test_rolling_axis_sum(axis_frame):