Skip to content

TST: Split pandas/tests/frame/test_indexing into a directory (#29544) #29694

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 388 additions & 0 deletions pandas/tests/frame/indexing/test_categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,388 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import Categorical, DataFrame, Index, Series
import pandas.util.testing as tm


class TestDataFrameIndexingCategorical:
def test_assignment(self):
# assignment
df = DataFrame(
{"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")}
)
labels = Categorical(
["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
)

df = df.sort_values(by=["value"], ascending=True)
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
d = s.values
df["D"] = d
str(df)

result = df.dtypes
expected = Series(
[np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)],
index=["value", "D"],
)
tm.assert_series_equal(result, expected)

df["E"] = s
str(df)

result = df.dtypes
expected = Series(
[
np.dtype("int32"),
CategoricalDtype(categories=labels, ordered=False),
CategoricalDtype(categories=labels, ordered=False),
],
index=["value", "D", "E"],
)
tm.assert_series_equal(result, expected)

result1 = df["D"]
result2 = df["E"]
tm.assert_categorical_equal(result1._data._block.values, d)

# sorting
s.name = "E"
tm.assert_series_equal(result2.sort_index(), s.sort_index())

cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
df = DataFrame(Series(cat))

def test_assigning_ops(self):
# systematically test the assigning operations:
# for all slicing ops:
# for value in categories and value not in categories:

# - assign a single value -> exp_single_cats_value

# - assign a complete row (mixed values) -> exp_single_row

# assign multiple rows (mixed values) (-> array) -> exp_multi_row

# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col

# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col

cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 1, 1, 1, 1, 1, 1]
orig = DataFrame({"cats": cats, "values": values}, index=idx)

# the expected values
# changed single row
cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
values1 = [1, 1, 2, 1, 1, 1, 1]
exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1)

# changed multiple rows
cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
values2 = [1, 1, 2, 2, 1, 1, 1]
exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2)

# changed part of the cats column
cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
values3 = [1, 1, 1, 1, 1, 1, 1]
exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3)

# changed single value in cats col
cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
values4 = [1, 1, 1, 1, 1, 1, 1]
exp_single_cats_value = DataFrame(
{"cats": cats4, "values": values4}, index=idx4
)

# iloc
# ###############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.iloc[2, 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

df = orig.copy()
df.iloc[df.index == "j", 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

# - assign a single value not in the current categories set
with pytest.raises(ValueError):
df = orig.copy()
df.iloc[2, 0] = "c"

# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.iloc[2, :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)

# - assign a complete row (mixed values) not in categories set
with pytest.raises(ValueError):
df = orig.copy()
df.iloc[2, :] = ["c", 2]

# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.iloc[2:4, :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)

with pytest.raises(ValueError):
df = orig.copy()
df.iloc[2:4, :] = [["c", 2], ["c", 2]]

# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc"))

with pytest.raises(ValueError):
# different values
df = orig.copy()
df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc"))

# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.iloc[2:4, 0] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
df.iloc[2:4, 0] = ["c", "c"]

# loc
# ##############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.loc["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

df = orig.copy()
df.loc[df.index == "j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

# - assign a single value not in the current categories set
with pytest.raises(ValueError):
df = orig.copy()
df.loc["j", "cats"] = "c"

# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.loc["j", :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)

# - assign a complete row (mixed values) not in categories set
with pytest.raises(ValueError):
df = orig.copy()
df.loc["j", :] = ["c", 2]

# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)

with pytest.raises(ValueError):
df = orig.copy()
df.loc["j":"k", :] = [["c", 2], ["c", 2]]

# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(
["b", "b"], categories=["a", "b", "c"]
)

with pytest.raises(ValueError):
# different values
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(
["c", "c"], categories=["a", "b", "c"]
)

# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", "cats"] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
df.loc["j":"k", "cats"] = ["c", "c"]

# loc
# ##############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.loc["j", df.columns[0]] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

df = orig.copy()
df.loc[df.index == "j", df.columns[0]] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

# - assign a single value not in the current categories set
with pytest.raises(ValueError):
df = orig.copy()
df.loc["j", df.columns[0]] = "c"

# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.loc["j", :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)

# - assign a complete row (mixed values) not in categories set
with pytest.raises(ValueError):
df = orig.copy()
df.loc["j", :] = ["c", 2]

# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)

with pytest.raises(ValueError):
df = orig.copy()
df.loc["j":"k", :] = [["c", 2], ["c", 2]]

# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(
["b", "b"], categories=["a", "b", "c"]
)

with pytest.raises(ValueError):
# different values
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(
["c", "c"], categories=["a", "b", "c"]
)

# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", df.columns[0]] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)

with pytest.raises(ValueError):
df.loc["j":"k", df.columns[0]] = ["c", "c"]

# iat
df = orig.copy()
df.iat[2, 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

# - assign a single value not in the current categories set
with pytest.raises(ValueError):
df = orig.copy()
df.iat[2, 0] = "c"

# at
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.at["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

# - assign a single value not in the current categories set
with pytest.raises(ValueError):
df = orig.copy()
df.at["j", "cats"] = "c"

# fancy indexing
catsf = Categorical(
["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"]
)
idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
valuesf = [1, 1, 3, 3, 1, 1, 1]
df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)

exp_fancy = exp_multi_row.copy()
exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)

df[df["cats"] == "c"] = ["b", 2]
# category c is kept in .categories
tm.assert_frame_equal(df, exp_fancy)

# set_value
df = orig.copy()
df.at["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)

with pytest.raises(ValueError):
df = orig.copy()
df.at["j", "cats"] = "c"

# Assigning a Category to parts of a int/... column uses the values of
# the Categorical
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp)

def test_functions_no_warnings(self):
df = DataFrame({"value": np.random.randint(0, 100, 20)})
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
with tm.assert_produces_warning(False):
df["group"] = pd.cut(
df.value, range(0, 105, 10), right=False, labels=labels
)

def test_loc_indexing_preserves_index_category_dtype(self):
# GH 15166
df = DataFrame(
data=np.arange(2, 22, 2),
index=pd.MultiIndex(
levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
codes=[[0] * 5 + [1] * 5, range(10)],
names=["Index1", "Index2"],
),
)

expected = pd.CategoricalIndex(
["a", "b"],
categories=["a", "b"],
ordered=False,
name="Index1",
dtype="category",
)

result = df.index.levels[0]
tm.assert_index_equal(result, expected)

result = df.loc[["a"]].index.levels[0]
tm.assert_index_equal(result, expected)

def test_wrong_length_cat_dtype_raises(self):
# GH29523
cat = pd.Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"])
df = pd.DataFrame({"bar": range(10)})
err = "Length of values does not match length of index"
with pytest.raises(ValueError, match=err):
df["foo"] = cat
Loading