pandas/tests/indexes/test_common.py

"""
Collection of tests asserting things that should be true for
any index subclass except for MultiIndex. Makes use of the `index_flat`
fixture defined in pandas/conftest.py.
"""
from copy import (
    copy,
    deepcopy,
)
import re

import numpy as np
import pytest

from pandas.compat import IS64
from pandas.compat.numpy import np_version_gte1p25

from pandas.core.dtypes.common import (
    is_integer_dtype,
    is_numeric_dtype,
)

import pandas as pd
from pandas import (
    CategoricalIndex,
    MultiIndex,
    PeriodIndex,
    RangeIndex,
)
import pandas._testing as tm


class TestCommon:
    @pytest.mark.parametrize("name", [None, "new_name"])
    def test_to_frame(self, name, index_flat, using_copy_on_write):
        # see GH#15230, GH#22580
        idx = index_flat

        if name:
            idx_name = name
        else:
            idx_name = idx.name or 0

        df = idx.to_frame(name=idx_name)

        assert df.index is idx
        assert len(df.columns) == 1
        assert df.columns[0] == idx_name
        if not using_copy_on_write:
            assert df[idx_name].values is not idx.values

        df = idx.to_frame(index=False, name=idx_name)
        assert df.index is not idx

    def test_droplevel(self, index_flat):
        # GH 21115
        # MultiIndex is tested separately in test_multi.py
        index = index_flat

        assert index.droplevel([]).equals(index)

        for level in [index.name, [index.name]]:
            if isinstance(index.name, tuple) and level is index.name:
                # GH 21121 : droplevel with tuple name
                continue
            msg = (
                "Cannot remove 1 levels from an index with 1 levels: at least one "
                "level must be left."
            )
            with pytest.raises(ValueError, match=msg):
                index.droplevel(level)

        for level in "wrong", ["wrong"]:
            with pytest.raises(
                KeyError,
                match=r"'Requested level \(wrong\) does not match index name \(None\)'",
            ):
                index.droplevel(level)

    def test_constructor_non_hashable_name(self, index_flat):
        # GH 20527
        index = index_flat

        message = "Index.name must be a hashable type"
        renamed = [["1"]]

        # With .rename()
        with pytest.raises(TypeError, match=message):
            index.rename(name=renamed)

        # With .set_names()
        with pytest.raises(TypeError, match=message):
            index.set_names(names=renamed)

    def test_constructor_unwraps_index(self, index_flat):
        a = index_flat
        # Passing dtype is necessary for Index([True, False], dtype=object)
        #  case.
        b = type(a)(a, dtype=a.dtype)
        tm.assert_equal(a._data, b._data)

    def test_to_flat_index(self, index_flat):
        # 22866
        index = index_flat

        result = index.to_flat_index()
        tm.assert_index_equal(result, index)

    def test_set_name_methods(self, index_flat):
        # MultiIndex tested separately
        index = index_flat
        new_name = "This is the new name for this index"

        original_name = index.name
        new_ind = index.set_names([new_name])
        assert new_ind.name == new_name
        assert index.name == original_name
        res = index.rename(new_name, inplace=True)

        # should return None
        assert res is None
        assert index.name == new_name
        assert index.names == [new_name]
        with pytest.raises(ValueError, match="Level must be None"):
            index.set_names("a", level=0)

        # rename in place just leaves tuples and other containers alone
        name = ("A", "B")
        index.rename(name, inplace=True)
        assert index.name == name
        assert index.names == [name]

    @pytest.mark.xfail
    def test_set_names_single_label_no_level(self, index_flat):
        with pytest.raises(TypeError, match="list-like"):
            # should still fail even if it would be the right length
            index_flat.set_names("a")

    def test_copy_and_deepcopy(self, index_flat):
        index = index_flat

        for func in (copy, deepcopy):
            idx_copy = func(index)
            assert idx_copy is not index
            assert idx_copy.equals(index)

        new_copy = index.copy(deep=True, name="banana")
        assert new_copy.name == "banana"

    def test_copy_name(self, index_flat):
        # GH#12309: Check that the "name" argument
        # passed at initialization is honored.
        index = index_flat

        first = type(index)(index, copy=True, name="mario")
        second = type(first)(first, copy=False)

        # Even though "copy=False", we want a new object.
        assert first is not second
        tm.assert_index_equal(first, second)

        # Not using tm.assert_index_equal() since names differ.
        assert index.equals(first)

        assert first.name == "mario"
        assert second.name == "mario"

        # TODO: belongs in series arithmetic tests?
        s1 = pd.Series(2, index=first)
        s2 = pd.Series(3, index=second[:-1])
        # See GH#13365
        s3 = s1 * s2
        assert s3.index.name == "mario"

    def test_copy_name2(self, index_flat):
        # GH#35592
        index = index_flat

        assert index.copy(name="mario").name == "mario"

        with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
            index.copy(name=["mario", "luigi"])

        msg = f"{type(index).__name__}.name must be a hashable type"
        with pytest.raises(TypeError, match=msg):
            index.copy(name=[["mario"]])

    def test_unique_level(self, index_flat):
        # don't test a MultiIndex here (as its tested separated)
        index = index_flat

        # GH 17896
        expected = index.drop_duplicates()
        for level in [0, index.name, None]:
            result = index.unique(level=level)
            tm.assert_index_equal(result, expected)

        msg = "Too many levels: Index has only 1 level, not 4"
        with pytest.raises(IndexError, match=msg):
            index.unique(level=3)

        msg = (
            rf"Requested level \(wrong\) does not match index name "
            rf"\({re.escape(index.name.__repr__())}\)"
        )
        with pytest.raises(KeyError, match=msg):
            index.unique(level="wrong")

    def test_unique(self, index_flat):
        # MultiIndex tested separately
        index = index_flat
        if not len(index):
            pytest.skip("Skip check for empty Index and MultiIndex")

        idx = index[[0] * 5]
        idx_unique = index[[0]]

        # We test against `idx_unique`, so first we make sure it's unique
        # and doesn't contain nans.
        assert idx_unique.is_unique is True
        try:
            assert idx_unique.hasnans is False
        except NotImplementedError:
            pass

        result = idx.unique()
        tm.assert_index_equal(result, idx_unique)

        # nans:
        if not index._can_hold_na:
            pytest.skip("Skip na-check if index cannot hold na")

        vals = index._values[[0] * 5]
        vals[0] = np.nan

        vals_unique = vals[:2]
        idx_nan = index._shallow_copy(vals)
        idx_unique_nan = index._shallow_copy(vals_unique)
        assert idx_unique_nan.is_unique is True

        assert idx_nan.dtype == index.dtype
        assert idx_unique_nan.dtype == index.dtype

        expected = idx_unique_nan
        for pos, i in enumerate([idx_nan, idx_unique_nan]):
            result = i.unique()
            tm.assert_index_equal(result, expected)

    @pytest.mark.filterwarnings("ignore:Period with BDay freq:FutureWarning")
    @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
    def test_searchsorted_monotonic(self, index_flat, request):
        # GH17271
        index = index_flat
        # not implemented for tuple searches in MultiIndex
        # or Intervals searches in IntervalIndex
        if isinstance(index, pd.IntervalIndex):
            mark = pytest.mark.xfail(
                reason="IntervalIndex.searchsorted does not support Interval arg",
                raises=NotImplementedError,
            )
            request.node.add_marker(mark)

        # nothing to test if the index is empty
        if index.empty:
            pytest.skip("Skip check for empty Index")
        value = index[0]

        # determine the expected results (handle dupes for 'right')
        expected_left, expected_right = 0, (index == value).argmin()
        if expected_right == 0:
            # all values are the same, expected_right should be length
            expected_right = len(index)

        # test _searchsorted_monotonic in all cases
        # test searchsorted only for increasing
        if index.is_monotonic_increasing:
            ssm_left = index._searchsorted_monotonic(value, side="left")
            assert expected_left == ssm_left

            ssm_right = index._searchsorted_monotonic(value, side="right")
            assert expected_right == ssm_right

            ss_left = index.searchsorted(value, side="left")
            assert expected_left == ss_left

            ss_right = index.searchsorted(value, side="right")
            assert expected_right == ss_right

        elif index.is_monotonic_decreasing:
            ssm_left = index._searchsorted_monotonic(value, side="left")
            assert expected_left == ssm_left

            ssm_right = index._searchsorted_monotonic(value, side="right")
            assert expected_right == ssm_right
        else:
            # non-monotonic should raise.
            msg = "index must be monotonic increasing or decreasing"
            with pytest.raises(ValueError, match=msg):
                index._searchsorted_monotonic(value, side="left")

    @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
    def test_drop_duplicates(self, index_flat, keep):
        # MultiIndex is tested separately
        index = index_flat
        if isinstance(index, RangeIndex):
            pytest.skip(
                "RangeIndex is tested in test_drop_duplicates_no_duplicates "
                "as it cannot hold duplicates"
            )
        if len(index) == 0:
            pytest.skip(
                "empty index is tested in test_drop_duplicates_no_duplicates "
                "as it cannot hold duplicates"
            )

        # make unique index
        holder = type(index)
        unique_values = list(set(index))
        dtype = index.dtype if is_numeric_dtype(index) else None
        unique_idx = holder(unique_values, dtype=dtype)

        # make duplicated index
        n = len(unique_idx)
        duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5))
        idx = holder(unique_idx.values[duplicated_selection])

        # Series.duplicated is tested separately
        expected_duplicated = (
            pd.Series(duplicated_selection).duplicated(keep=keep).values
        )
        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)

        # Series.drop_duplicates is tested separately
        expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
        tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)

    @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
    def test_drop_duplicates_no_duplicates(self, index_flat):
        # MultiIndex is tested separately
        index = index_flat

        # make unique index
        if isinstance(index, RangeIndex):
            # RangeIndex cannot have duplicates
            unique_idx = index
        else:
            holder = type(index)
            unique_values = list(set(index))
            dtype = index.dtype if is_numeric_dtype(index) else None
            unique_idx = holder(unique_values, dtype=dtype)

        # check on unique index
        expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
        tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
        result_dropped = unique_idx.drop_duplicates()
        tm.assert_index_equal(result_dropped, unique_idx)
        # validate shallow copy
        assert result_dropped is not unique_idx

    def test_drop_duplicates_inplace(self, index):
        msg = r"drop_duplicates\(\) got an unexpected keyword argument"
        with pytest.raises(TypeError, match=msg):
            index.drop_duplicates(inplace=True)

    @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
    def test_has_duplicates(self, index_flat):
        # MultiIndex tested separately in:
        #   tests/indexes/multi/test_unique_and_duplicates.
        index = index_flat
        holder = type(index)
        if not len(index) or isinstance(index, RangeIndex):
            # MultiIndex tested separately in:
            #   tests/indexes/multi/test_unique_and_duplicates.
            # RangeIndex is unique by definition.
            pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")

        idx = holder([index[0]] * 5)
        assert idx.is_unique is False
        assert idx.has_duplicates is True

    @pytest.mark.parametrize(
        "dtype",
        ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
    )
    def test_astype_preserves_name(self, index, dtype):
        # https://github.com/pandas-dev/pandas/issues/32013
        if isinstance(index, MultiIndex):
            index.names = ["idx" + str(i) for i in range(index.nlevels)]
        else:
            index.name = "idx"

        warn = None
        if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
            # imaginary components discarded
            if np_version_gte1p25:
                warn = np.exceptions.ComplexWarning
            else:
                warn = np.ComplexWarning

        is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category"
        try:
            # Some of these conversions cannot succeed so we use a try / except
            with tm.assert_produces_warning(
                warn,
                raise_on_extra_warnings=is_pyarrow_str,
                check_stacklevel=False,
            ):
                result = index.astype(dtype)
        except (ValueError, TypeError, NotImplementedError, SystemError):
            return

        if isinstance(index, MultiIndex):
            assert result.names == index.names
        else:
            assert result.name == index.name

    def test_hasnans_isnans(self, index_flat):
        # GH#11343, added tests for hasnans / isnans
        index = index_flat

        # cases in indices doesn't include NaN
        idx = index.copy(deep=True)
        expected = np.array([False] * len(idx), dtype=bool)
        tm.assert_numpy_array_equal(idx._isnan, expected)
        assert idx.hasnans is False

        idx = index.copy(deep=True)
        values = idx._values

        if len(index) == 0:
            return
        elif is_integer_dtype(index.dtype):
            return
        elif index.dtype == bool:
            # values[1] = np.nan below casts to True!
            return

        values[1] = np.nan

        idx = type(index)(values)

        expected = np.array([False] * len(idx), dtype=bool)
        expected[1] = True
        tm.assert_numpy_array_equal(idx._isnan, expected)
        assert idx.hasnans is True


@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.parametrize("na_position", [None, "middle"])
def test_sort_values_invalid_na_position(index_with_missing, na_position):
    with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
        index_with_missing.sort_values(na_position=na_position)


@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.parametrize("na_position", ["first", "last"])
def test_sort_values_with_missing(index_with_missing, na_position, request):
    # GH 35584. Test that sort_values works with missing values,
    # sort non-missing and place missing according to na_position

    if isinstance(index_with_missing, CategoricalIndex):
        request.node.add_marker(
            pytest.mark.xfail(
                reason="missing value sorting order not well-defined", strict=False
            )
        )

    missing_count = np.sum(index_with_missing.isna())
    not_na_vals = index_with_missing[index_with_missing.notna()].values
    sorted_values = np.sort(not_na_vals)
    if na_position == "first":
        sorted_values = np.concatenate([[None] * missing_count, sorted_values])
    else:
        sorted_values = np.concatenate([sorted_values, [None] * missing_count])

    # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
    expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)

    result = index_with_missing.sort_values(na_position=na_position)
    tm.assert_index_equal(result, expected)


def test_ndarray_compat_properties(index):
    if isinstance(index, PeriodIndex) and not IS64:
        pytest.skip("Overflow")
    idx = index
    assert idx.T.equals(idx)
    assert idx.transpose().equals(idx)

    values = idx.values

    assert idx.shape == values.shape
    assert idx.ndim == values.ndim
    assert idx.size == values.size

    if not isinstance(index, (RangeIndex, MultiIndex)):
        # These two are not backed by an ndarray
        assert idx.nbytes == values.nbytes

    # test for validity
    idx.nbytes
    idx.values.nbytes