Skip to content

REF: CategoricalIndex indexing tests #31559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
361 changes: 7 additions & 354 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,76 +146,6 @@ def test_contains_list(self):
with pytest.raises(TypeError, match="unhashable type"):
["a", "b"] in idx

def test_map(self):
ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
result = ci.map(lambda x: x.lower())
exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
tm.assert_index_equal(result, exp)

ci = pd.CategoricalIndex(
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
)
result = ci.map(lambda x: x.lower())
exp = pd.CategoricalIndex(
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
)
tm.assert_index_equal(result, exp)

# GH 12766: Return an index not an array
tm.assert_index_equal(
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
)

# change categories dtype
ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)

def f(x):
return {"A": 10, "B": 20, "C": 30}.get(x)

result = ci.map(f)
exp = pd.CategoricalIndex(
[10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False
)
tm.assert_index_equal(result, exp)

result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"]))
tm.assert_index_equal(result, exp)

result = ci.map({"A": 10, "B": 20, "C": 30})
tm.assert_index_equal(result, exp)

def test_map_with_categorical_series(self):
# GH 12756
a = pd.Index([1, 2, 3, 4])
b = pd.Series(["even", "odd", "even", "odd"], dtype="category")
c = pd.Series(["even", "odd", "even", "odd"])

exp = CategoricalIndex(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(b), exp)
exp = pd.Index(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(c), exp)

@pytest.mark.parametrize(
("data", "f"),
(
([1, 1, np.nan], pd.isna),
([1, 2, np.nan], pd.isna),
([1, 1, np.nan], {1: False}),
([1, 2, np.nan], {1: False, 2: False}),
([1, 1, np.nan], pd.Series([False, False])),
([1, 2, np.nan], pd.Series([False, False, False])),
),
)
def test_map_with_nan(self, data, f): # GH 24241
values = pd.Categorical(data)
result = values.map(f)
if data[1] == 1:
expected = pd.Categorical([False, False, np.nan])
tm.assert_categorical_equal(result, expected)
else:
expected = pd.Index([False, False, np.nan])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
def test_where(self, klass):
i = self.create_index()
Expand Down Expand Up @@ -384,89 +314,6 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
expected = index
tm.assert_index_equal(result, expected)

def test_reindex_base(self):
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
expected = np.arange(len(idx), dtype=np.intp)

actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)

with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")

def test_reindexing(self):
np.random.seed(123456789)

ci = self.create_index()
oidx = Index(np.array(ci))

for n in [1, 2, 5, len(ci)]:
finder = oidx[np.random.randint(0, len(ci), size=n)]
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

# see gh-17323
#
# Even when indexer is equal to the
# members in the index, we should
# respect duplicates instead of taking
# the fast-track path.
for finder in [list("aabbca"), list("aababca")]:
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

def test_reindex_dtype(self):
c = CategoricalIndex(["a", "b", "c", "a"])
res, indexer = c.reindex(["a", "c"])
tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"])
res, indexer = c.reindex(Categorical(["a", "c"]))

exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
res, indexer = c.reindex(["a", "c"])
exp = Index(["a", "a", "c"], dtype="object")
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
res, indexer = c.reindex(Categorical(["a", "c"]))
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

def test_reindex_duplicate_target(self):
# See GH25459
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
res, indexer = cat.reindex(["a", "c", "c"])
exp = Index(["a", "c", "c"], dtype="object")
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))

res, indexer = cat.reindex(
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
)
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))

def test_reindex_empty_index(self):
# See GH16770
c = CategoricalIndex([])
res, indexer = c.reindex(["a", "b"])
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))

@pytest.mark.parametrize(
"data, non_lexsorted_data",
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
Expand Down Expand Up @@ -518,75 +365,6 @@ def test_drop_duplicates(self):
tm.assert_index_equal(idx.drop_duplicates(), expected)
tm.assert_index_equal(idx.unique(), expected)

def test_get_indexer(self):

idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
idx2 = CategoricalIndex(list("abf"))

for indexer in [idx2, list("abf"), Index(list("abf"))]:
r1 = idx1.get_indexer(idx2)
tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))

msg = (
"method='pad' and method='backfill' not implemented yet for "
"CategoricalIndex"
)
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="pad")
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="backfill")

msg = "method='nearest' not implemented yet for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="nearest")

def test_get_loc(self):
# GH 12531
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
idx1 = Index(list("abcde"))
assert cidx1.get_loc("a") == idx1.get_loc("a")
assert cidx1.get_loc("e") == idx1.get_loc("e")

for i in [cidx1, idx1]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")

# non-unique
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
idx2 = Index(list("aacded"))

# results in bool array
res = cidx2.get_loc("d")
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
tm.assert_numpy_array_equal(
res, np.array([False, False, False, True, False, True])
)
# unique element results in scalar
res = cidx2.get_loc("e")
assert res == idx2.get_loc("e")
assert res == 4

for i in [cidx2, idx2]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")

# non-unique, sliceable
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
idx3 = Index(list("aabbb"))

# results in slice
res = cidx3.get_loc("a")
assert res == idx3.get_loc("a")
assert res == slice(0, 2, None)

res = cidx3.get_loc("b")
assert res == idx3.get_loc("b")
assert res == slice(2, 5, None)

for i in [cidx3, idx3]:
with pytest.raises(KeyError, match="'c'"):
i.get_loc("c")

def test_repr_roundtrip(self):

ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
Expand Down Expand Up @@ -837,122 +615,6 @@ def test_fillna_categorical(self):
with pytest.raises(ValueError, match=msg):
idx.fillna(2.0)

def test_take_fill_value(self):
# GH 12631

# numeric category
idx = pd.CategoricalIndex([1, 2, 3], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

# object category
idx = pd.CategoricalIndex(
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
)
result = idx.take(np.array([1, 0, -1]))
expected = pd.CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.CategoricalIndex(
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)

msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)

with pytest.raises(IndexError):
idx.take(np.array([1, -5]))

def test_take_fill_value_datetime(self):

# datetime category
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
idx = pd.CategoricalIndex(idx)
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = pd.CategoricalIndex(expected)
tm.assert_index_equal(result, expected)

# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
expected = pd.CategoricalIndex(expected, categories=exp_cats)
tm.assert_index_equal(result, expected)

# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = pd.CategoricalIndex(expected)
tm.assert_index_equal(result, expected)

msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)

with pytest.raises(IndexError):
idx.take(np.array([1, -5]))

def test_take_invalid_kwargs(self):
idx = pd.CategoricalIndex([1, 2, 3], name="foo")
indices = [1, 0, -1]

msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)

msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)

msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")

@pytest.mark.parametrize(
"dtype, engine_type",
[
Expand All @@ -976,19 +638,10 @@ def test_engine_type(self, dtype, engine_type):
assert np.issubdtype(ci.codes.dtype, dtype)
assert isinstance(ci._engine, engine_type)

@pytest.mark.parametrize(
"data, categories",
[
(list("abcbca"), list("cab")),
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
],
ids=["string", "interval"],
)
def test_map_str(self, data, categories, ordered_fixture):
# GH 31202 - override base class since we want to maintain categorical/ordered
index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture)
result = index.map(str)
expected = CategoricalIndex(
map(str, data), categories=map(str, categories), ordered=ordered_fixture
)
tm.assert_index_equal(result, expected)
def test_reindex_base(self):
# See test_reindex.py
pass

def test_map_str(self):
# See test_map.py
pass
Loading