Skip to content

TYP: MultiIndex #39729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4821,7 +4821,10 @@ def set_index(
names.extend(col.names)
elif isinstance(col, (Index, Series)):
# if Index then not MultiIndex (treated above)
arrays.append(col)

# error: Argument 1 to "append" of "list" has incompatible
# type "Union[Index, Series]"; expected "Index" [arg-type]
arrays.append(col) # type:ignore[arg-type]
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
arrays.append(col)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5766,7 +5766,7 @@ def insert(self, loc: int, item):
idx = np.concatenate((arr[:loc], item, arr[loc:]))
return Index(idx, name=self.name)

def drop(self, labels, errors: str_t = "raise"):
def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT:
"""
Make new Index with passed list of labels deleted.

Expand Down
78 changes: 44 additions & 34 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Sequence,
Tuple,
Union,
cast,
)
import warnings

Expand Down Expand Up @@ -71,7 +72,7 @@
)

if TYPE_CHECKING:
from pandas import Series
from pandas import CategoricalIndex, Series

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(
Expand Down Expand Up @@ -478,7 +479,7 @@ def from_tuples(
tuples,
sortorder: Optional[int] = None,
names: Optional[Sequence[Hashable]] = None,
):
) -> MultiIndex:
"""
Convert list of tuples to MultiIndex.

Expand Down Expand Up @@ -537,7 +538,9 @@ def from_tuples(
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

@classmethod
def from_product(cls, iterables, sortorder=None, names=lib.no_default):
def from_product(
cls, iterables, sortorder=None, names=lib.no_default
) -> MultiIndex:
"""
Make a MultiIndex from the cartesian product of multiple iterables.

Expand Down Expand Up @@ -596,7 +599,7 @@ def from_product(cls, iterables, sortorder=None, names=lib.no_default):
return cls(levels, codes, sortorder=sortorder, names=names)

@classmethod
def from_frame(cls, df, sortorder=None, names=None):
def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex:
"""
Make a MultiIndex from a DataFrame.

Expand Down Expand Up @@ -664,14 +667,15 @@ def from_frame(cls, df, sortorder=None, names=None):
# --------------------------------------------------------------------

@cache_readonly
def _values(self):
def _values(self) -> np.ndarray:
# We override here, since our parent uses _data, which we don't use.
values = []

for i in range(self.nlevels):
vals = self._get_level_values(i)
if is_categorical_dtype(vals.dtype):
vals = vals._internal_get_values()
vals = cast("CategoricalIndex", vals)
vals = vals._data._internal_get_values()
if isinstance(vals.dtype, ExtensionDtype) or isinstance(
vals, (ABCDatetimeIndex, ABCTimedeltaIndex)
):
Expand All @@ -683,7 +687,7 @@ def _values(self):
return arr

@property
def values(self):
def values(self) -> np.ndarray:
return self._values

@property
Expand Down Expand Up @@ -782,7 +786,9 @@ def _set_levels(

self._reset_cache()

def set_levels(self, levels, level=None, inplace=None, verify_integrity=True):
def set_levels(
self, levels, level=None, inplace=None, verify_integrity: bool = True
):
"""
Set new levels on MultiIndex. Defaults to returning new index.

Expand Down Expand Up @@ -909,7 +915,7 @@ def nlevels(self) -> int:
return len(self._levels)

@property
def levshape(self):
def levshape(self) -> Shape:
"""
A tuple with the length of each level.

Expand Down Expand Up @@ -967,7 +973,7 @@ def _set_codes(

self._reset_cache()

def set_codes(self, codes, level=None, inplace=None, verify_integrity=True):
def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True):
"""
Set new codes on MultiIndex. Defaults to returning new index.

Expand All @@ -985,7 +991,7 @@ def set_codes(self, codes, level=None, inplace=None, verify_integrity=True):
If True, mutates in place.

.. deprecated:: 1.2.0
verify_integrity : bool (default True)
verify_integrity : bool, default True
If True, checks that levels and codes are compatible.

Returns
Expand Down Expand Up @@ -1080,12 +1086,12 @@ def _constructor(self):
return type(self).from_tuples

@doc(Index._shallow_copy)
def _shallow_copy(self, values, name=lib.no_default):
def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:
names = name if name is not lib.no_default else self.names

return type(self).from_tuples(values, sortorder=None, names=names)

def _view(self: MultiIndex) -> MultiIndex:
def _view(self) -> MultiIndex:
result = type(self)(
levels=self.levels,
codes=self.codes,
Expand Down Expand Up @@ -1580,7 +1586,7 @@ def is_monotonic_decreasing(self) -> bool:
return self[::-1].is_monotonic_increasing

@cache_readonly
def _inferred_type_levels(self):
def _inferred_type_levels(self) -> List[str]:
""" return a list of the inferred types, one for each level """
return [i.inferred_type for i in self.levels]

Expand All @@ -1598,7 +1604,7 @@ def fillna(self, value=None, downcast=None):
raise NotImplementedError("isna is not defined for MultiIndex")

@doc(Index.dropna)
def dropna(self, how="any"):
def dropna(self, how: str = "any") -> MultiIndex:
nans = [level_codes == -1 for level_codes in self.codes]
if how == "any":
indexer = np.any(nans, axis=0)
Expand All @@ -1610,7 +1616,7 @@ def dropna(self, how="any"):
new_codes = [level_codes[~indexer] for level_codes in self.codes]
return self.set_codes(codes=new_codes)

def _get_level_values(self, level, unique=False):
def _get_level_values(self, level: int, unique: bool = False) -> Index:
"""
Return vector of label values for requested level,
equal to the length of the index
Expand All @@ -1619,13 +1625,13 @@ def _get_level_values(self, level, unique=False):

Parameters
----------
level : int level
level : int
unique : bool, default False
if True, drop duplicated values

Returns
-------
values : ndarray
Index
"""
lev = self.levels[level]
level_codes = self.codes[level]
Expand Down Expand Up @@ -1759,7 +1765,7 @@ def to_frame(self, index=True, name=None):
result.index = self
return result

def to_flat_index(self):
def to_flat_index(self) -> Index:
"""
Convert a MultiIndex to an Index of Tuples containing the level values.

Expand Down Expand Up @@ -1862,7 +1868,7 @@ def _lexsort_depth(self) -> int:
return self.sortorder
return _lexsort_depth(self.codes, self.nlevels)

def _sort_levels_monotonic(self):
def _sort_levels_monotonic(self) -> MultiIndex:
"""
This is an *internal* function.

Expand Down Expand Up @@ -1929,7 +1935,7 @@ def _sort_levels_monotonic(self):
verify_integrity=False,
)

def remove_unused_levels(self):
def remove_unused_levels(self) -> MultiIndex:
"""
Create new MultiIndex from current that removes unused levels.

Expand Down Expand Up @@ -2065,7 +2071,9 @@ def __getitem__(self, key):
)

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
def take(
self: MultiIndex, indices, axis=0, allow_fill=True, fill_value=None, **kwargs
) -> MultiIndex:
nv.validate_take((), kwargs)
indices = ensure_platform_int(indices)

Expand Down Expand Up @@ -2129,7 +2137,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray:
return self._values.argsort(*args, **kwargs)

@Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
def repeat(self, repeats, axis=None):
def repeat(self, repeats: int, axis=None) -> MultiIndex:
nv.validate_repeat((), {"axis": axis})
repeats = ensure_platform_int(repeats)
return MultiIndex(
Expand Down Expand Up @@ -2200,7 +2208,7 @@ def drop(self, codes, level=None, errors="raise"):

return self.delete(inds)

def _drop_from_level(self, codes, level, errors="raise"):
def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex:
codes = com.index_labels_to_array(codes)
i = self._get_level_number(level)
index = self.levels[i]
Expand All @@ -2219,7 +2227,7 @@ def _drop_from_level(self, codes, level, errors="raise"):

return self[mask]

def swaplevel(self, i=-2, j=-1):
def swaplevel(self, i=-2, j=-1) -> MultiIndex:
"""
Swap level i with level j.

Expand Down Expand Up @@ -2277,7 +2285,7 @@ def swaplevel(self, i=-2, j=-1):
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)

def reorder_levels(self, order):
def reorder_levels(self, order) -> MultiIndex:
"""
Rearrange levels using input order. May not drop or duplicate levels.

Expand Down Expand Up @@ -2323,7 +2331,7 @@ def reorder_levels(self, order):
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)

def _get_codes_for_sorting(self):
def _get_codes_for_sorting(self) -> List[Categorical]:
"""
we are categorizing our codes by using the
available categories (all, not just observed)
Expand All @@ -2343,7 +2351,9 @@ def cats(level_codes):
for level_codes in self.codes
]

def sortlevel(self, level=0, ascending=True, sort_remaining=True):
def sortlevel(
self, level=0, ascending: bool = True, sort_remaining: bool = True
) -> Tuple[MultiIndex, np.ndarray]:
"""
Sort MultiIndex at the requested level.

Expand Down Expand Up @@ -3396,7 +3406,7 @@ def _reorder_indexer(
ind = np.lexsort(keys)
return indexer[ind]

def truncate(self, before=None, after=None):
def truncate(self, before=None, after=None) -> MultiIndex:
"""
Slice index between two labels / tuples, return new MultiIndex

Expand Down Expand Up @@ -3517,7 +3527,7 @@ def _union(self, other, sort):
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
return is_object_dtype(dtype)

def _get_reconciled_name_object(self, other):
def _get_reconciled_name_object(self, other) -> MultiIndex:
"""
If the result of a set operation will be self,
return self, unless the names change, in which
Expand Down Expand Up @@ -3662,7 +3672,7 @@ def _validate_fill_value(self, item):
raise ValueError("Item must have length equal to number of levels.")
return item

def insert(self, loc: int, item):
def insert(self, loc: int, item) -> MultiIndex:
"""
Make new MultiIndex inserting new item at location

Expand Down Expand Up @@ -3702,7 +3712,7 @@ def insert(self, loc: int, item):
levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
)

def delete(self, loc):
def delete(self, loc) -> MultiIndex:
"""
Make new index with passed location deleted

Expand All @@ -3719,7 +3729,7 @@ def delete(self, loc):
)

@doc(Index.isin)
def isin(self, values, level=None):
def isin(self, values, level=None) -> np.ndarray:
if level is None:
values = MultiIndex.from_tuples(values, names=self.names)._values
return algos.isin(self._values, values)
Expand Down Expand Up @@ -3800,7 +3810,7 @@ def _get_na_rep(dtype) -> str:
return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")


def maybe_droplevels(index, key):
def maybe_droplevels(index: Index, key) -> Index:
"""
Attempt to drop level or levels from the given index.

Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,8 +1639,8 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
val = [0, 1, 1, 0]
df = DataFrame({"a": cat, "b": cat, "c": val})

idx = Categorical([0, 1])
idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"])
cat2 = Categorical([0, 1])
idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"])
expected_dict = {
"first": Series([0, np.NaN, np.NaN, 1], idx, name="c"),
"last": Series([1, np.NaN, np.NaN, 0], idx, name="c"),
Expand All @@ -1664,8 +1664,8 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
val = [0, 1, 1, 0]
df = DataFrame({"a": cat, "b": cat, "c": val})

idx = Categorical([0, 1])
idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"])
cat2 = Categorical([0, 1])
idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"])
expected_dict = {
"first": Series([0, np.NaN, np.NaN, 1], idx, name="c"),
"last": Series([1, np.NaN, np.NaN, 0], idx, name="c"),
Expand Down