Skip to content

REF/TST: misplaced MultiIndex tests #32314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 74 additions & 1 deletion pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from datetime import date, datetime
import itertools

import numpy as np
import pytest

Expand All @@ -6,7 +9,7 @@
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike

import pandas as pd
from pandas import Index, MultiIndex, date_range
from pandas import Index, MultiIndex, Series, date_range
import pandas._testing as tm


Expand Down Expand Up @@ -723,3 +726,73 @@ def test_index_equal_empty_iterable():
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(a, b)


def test_raise_invalid_sortorder():
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
# GH#28518

levels = [[0, 1], [0, 1, 2]]

# Correct sortorder
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)

with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2,
)

with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1,
)


def test_datetimeindex():
idx1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo",
)
idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
idx = MultiIndex.from_arrays([idx1, idx2])

expected1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
)

tm.assert_index_equal(idx.levels[0], expected1)
tm.assert_index_equal(idx.levels[1], idx2)

# from datetime combos
# GH 7888
date1 = date.today()
date2 = datetime.today()
date3 = Timestamp.today()

for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
index = MultiIndex.from_product([[d1], [d2]])
assert isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)


def test_constructor_with_tz():

index = pd.DatetimeIndex(
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
)
columns = pd.DatetimeIndex(
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
)

result = MultiIndex.from_arrays([index, columns])

assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)

result = MultiIndex.from_arrays([Series(index), Series(columns)])

assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
26 changes: 26 additions & 0 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,29 @@ def test_duplicated2():
tm.assert_numpy_array_equal(
mi.duplicated(), np.zeros(len(mi), dtype="bool")
)


def test_duplicated_drop_duplicates():
# GH#4060
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))

expected = np.array([False, False, False, True, False, False], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(), expected)

expected = np.array([True, False, False, False, False, False])
duplicated = idx.duplicated(keep="last")
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)

expected = np.array([True, False, False, True, False, False])
duplicated = idx.duplicated(keep=False)
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
14 changes: 13 additions & 1 deletion pandas/tests/indexes/multi/test_format.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import warnings

import numpy as np
import pytest

import pandas as pd
from pandas import MultiIndex
from pandas import Index, MultiIndex
import pandas._testing as tm


Expand Down Expand Up @@ -76,6 +77,17 @@ def test_repr_max_seq_item_setting(idx):


class TestRepr:
def test_unicode_repr_issues(self):
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
index = MultiIndex(levels=levels, codes=codes)

repr(index.levels)

# FIXME: dont leave commented-out
# NumPy bug
# repr(index.get_level_values(1))

def test_repr(self, idx):
result = idx[:1].__repr__()
expected = """\
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/indexes/multi/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,3 +498,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id
idx = MultiIndex.from_arrays(index_arr)
result = idx.slice_indexer(start=start_idx, end=end_idx)
assert result == expected


def test_pyint_engine():
# GH#18519 : when combinations of codes cannot be represented in 64
# bits, the index underlying the MultiIndex engine works with Python
# integers, rather than uint64.
N = 5
keys = [
tuple(l)
for l in [
[0] * 10 * N,
[1] * 10 * N,
[2] * 10 * N,
[np.nan] * N + [2] * 9 * N,
[0] * N + [2] * 9 * N,
[np.nan] * N + [2] * 8 * N + [0] * N,
]
]
# Each level contains 4 elements (including NaN), so it is represented
# in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
# 64 bit engine and truncating the first levels, the fourth and fifth
# keys would collide; if truncating the last levels, the fifth and
# sixth; if rotating bits rather than shifting, the third and fifth.

for idx in range(len(keys)):
index = MultiIndex.from_tuples(keys)
assert index.get_loc(keys[idx]) == idx

expected = np.arange(idx + 1, dtype=np.intp)
result = index.get_indexer([keys[i] for i in expected])
tm.assert_numpy_array_equal(result, expected)

# With missing key:
idces = range(len(keys))
expected = np.array([-1] + list(idces), dtype=np.intp)
missing = tuple([0, 1] * 5 * N)
result = index.get_indexer([missing] + [keys[i] for i in idces])
tm.assert_numpy_array_equal(result, expected)
46 changes: 46 additions & 0 deletions pandas/tests/indexes/multi/test_lexsort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pandas import MultiIndex


class TestIsLexsorted:
def test_is_lexsorted(self):
levels = [[0, 1], [0, 1, 2]]

index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
)
assert index.is_lexsorted()

index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
)
assert not index.is_lexsorted()

index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
)
assert not index.is_lexsorted()
assert index.lexsort_depth == 0


class TestLexsortDepth:
def test_lexsort_depth(self):
# Test that lexsort_depth return the correct sortorder
# when it was given to the MultiIndex const.
# GH#28518

levels = [[0, 1], [0, 1, 2]]

index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
assert index.lexsort_depth == 2

index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
)
assert index.lexsort_depth == 1

index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
)
assert index.lexsort_depth == 0
10 changes: 10 additions & 0 deletions pandas/tests/indexes/multi/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,13 @@ def test_nan_stays_float():
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()


def test_tuples_have_na():
index = MultiIndex(
levels=[[1, 0], [0, 1, 2, 3]],
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
)

assert pd.isna(index[4][0])
assert pd.isna(index.values[4][0])
50 changes: 50 additions & 0 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from datetime import datetime

import numpy as np
import pytest
import pytz

import pandas as pd
from pandas import Index, MultiIndex
Expand Down Expand Up @@ -95,6 +98,53 @@ def test_append(idx):
assert result.equals(idx)


def test_append_index():
idx1 = Index([1.1, 1.2, 1.3])
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
idx3 = Index(["A", "B", "C"])

midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])

result = idx1.append(midx_lv2)

# see gh-7112
tz = pytz.timezone("Asia/Tokyo")
expected_tuples = [
(1.1, tz.localize(datetime(2011, 1, 1))),
(1.2, tz.localize(datetime(2011, 1, 2))),
(1.3, tz.localize(datetime(2011, 1, 3))),
]
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
tm.assert_index_equal(result, expected)

result = midx_lv2.append(idx1)
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
tm.assert_index_equal(result, expected)

result = midx_lv2.append(midx_lv2)
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
tm.assert_index_equal(result, expected)

result = midx_lv2.append(midx_lv3)
tm.assert_index_equal(result, expected)

result = midx_lv3.append(midx_lv2)
expected = Index._simple_new(
np.array(
[
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
]
+ expected_tuples,
dtype=object,
),
None,
)
tm.assert_index_equal(result, expected)


def test_repeat():
reps = 2
numbers = [1, 2, 3]
Expand Down
Loading