Skip to content

Commit 66f0741

Browse files
committed
TST: Split pandas/tests/frame/test_indexing into a directory (#29544)
1 parent 505b6e7 commit 66f0741

File tree

4 files changed

+1045
-1004
lines changed

4 files changed

+1045
-1004
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.core.dtypes.dtypes import CategoricalDtype
5+
6+
import pandas as pd
7+
from pandas import (
8+
Categorical,
9+
DataFrame,
10+
Index,
11+
Series,
12+
)
13+
import pandas.util.testing as tm
14+
15+
16+
class TestDataFrameIndexingCategorical:
17+
def test_assignment(self):
18+
# assignment
19+
df = DataFrame(
20+
{"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")}
21+
)
22+
labels = Categorical(
23+
["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
24+
)
25+
26+
df = df.sort_values(by=["value"], ascending=True)
27+
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
28+
d = s.values
29+
df["D"] = d
30+
str(df)
31+
32+
result = df.dtypes
33+
expected = Series(
34+
[np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)],
35+
index=["value", "D"],
36+
)
37+
tm.assert_series_equal(result, expected)
38+
39+
df["E"] = s
40+
str(df)
41+
42+
result = df.dtypes
43+
expected = Series(
44+
[
45+
np.dtype("int32"),
46+
CategoricalDtype(categories=labels, ordered=False),
47+
CategoricalDtype(categories=labels, ordered=False),
48+
],
49+
index=["value", "D", "E"],
50+
)
51+
tm.assert_series_equal(result, expected)
52+
53+
result1 = df["D"]
54+
result2 = df["E"]
55+
tm.assert_categorical_equal(result1._data._block.values, d)
56+
57+
# sorting
58+
s.name = "E"
59+
tm.assert_series_equal(result2.sort_index(), s.sort_index())
60+
61+
cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
62+
df = DataFrame(Series(cat))
63+
64+
def test_assigning_ops(self):
65+
# systematically test the assigning operations:
66+
# for all slicing ops:
67+
# for value in categories and value not in categories:
68+
69+
# - assign a single value -> exp_single_cats_value
70+
71+
# - assign a complete row (mixed values) -> exp_single_row
72+
73+
# assign multiple rows (mixed values) (-> array) -> exp_multi_row
74+
75+
# assign a part of a column with dtype == categorical ->
76+
# exp_parts_cats_col
77+
78+
# assign a part of a column with dtype != categorical ->
79+
# exp_parts_cats_col
80+
81+
cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
82+
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
83+
values = [1, 1, 1, 1, 1, 1, 1]
84+
orig = DataFrame({"cats": cats, "values": values}, index=idx)
85+
86+
# the expected values
87+
# changed single row
88+
cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
89+
idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
90+
values1 = [1, 1, 2, 1, 1, 1, 1]
91+
exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1)
92+
93+
# changed multiple rows
94+
cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
95+
idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
96+
values2 = [1, 1, 2, 2, 1, 1, 1]
97+
exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2)
98+
99+
# changed part of the cats column
100+
cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
101+
idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
102+
values3 = [1, 1, 1, 1, 1, 1, 1]
103+
exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3)
104+
105+
# changed single value in cats col
106+
cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
107+
idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
108+
values4 = [1, 1, 1, 1, 1, 1, 1]
109+
exp_single_cats_value = DataFrame(
110+
{"cats": cats4, "values": values4}, index=idx4
111+
)
112+
113+
# iloc
114+
# ###############
115+
# - assign a single value -> exp_single_cats_value
116+
df = orig.copy()
117+
df.iloc[2, 0] = "b"
118+
tm.assert_frame_equal(df, exp_single_cats_value)
119+
120+
df = orig.copy()
121+
df.iloc[df.index == "j", 0] = "b"
122+
tm.assert_frame_equal(df, exp_single_cats_value)
123+
124+
# - assign a single value not in the current categories set
125+
with pytest.raises(ValueError):
126+
df = orig.copy()
127+
df.iloc[2, 0] = "c"
128+
129+
# - assign a complete row (mixed values) -> exp_single_row
130+
df = orig.copy()
131+
df.iloc[2, :] = ["b", 2]
132+
tm.assert_frame_equal(df, exp_single_row)
133+
134+
# - assign a complete row (mixed values) not in categories set
135+
with pytest.raises(ValueError):
136+
df = orig.copy()
137+
df.iloc[2, :] = ["c", 2]
138+
139+
# - assign multiple rows (mixed values) -> exp_multi_row
140+
df = orig.copy()
141+
df.iloc[2:4, :] = [["b", 2], ["b", 2]]
142+
tm.assert_frame_equal(df, exp_multi_row)
143+
144+
with pytest.raises(ValueError):
145+
df = orig.copy()
146+
df.iloc[2:4, :] = [["c", 2], ["c", 2]]
147+
148+
# assign a part of a column with dtype == categorical ->
149+
# exp_parts_cats_col
150+
df = orig.copy()
151+
df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
152+
tm.assert_frame_equal(df, exp_parts_cats_col)
153+
154+
with pytest.raises(ValueError):
155+
# different categories -> not sure if this should fail or pass
156+
df = orig.copy()
157+
df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc"))
158+
159+
with pytest.raises(ValueError):
160+
# different values
161+
df = orig.copy()
162+
df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc"))
163+
164+
# assign a part of a column with dtype != categorical ->
165+
# exp_parts_cats_col
166+
df = orig.copy()
167+
df.iloc[2:4, 0] = ["b", "b"]
168+
tm.assert_frame_equal(df, exp_parts_cats_col)
169+
170+
with pytest.raises(ValueError):
171+
df.iloc[2:4, 0] = ["c", "c"]
172+
173+
# loc
174+
# ##############
175+
# - assign a single value -> exp_single_cats_value
176+
df = orig.copy()
177+
df.loc["j", "cats"] = "b"
178+
tm.assert_frame_equal(df, exp_single_cats_value)
179+
180+
df = orig.copy()
181+
df.loc[df.index == "j", "cats"] = "b"
182+
tm.assert_frame_equal(df, exp_single_cats_value)
183+
184+
# - assign a single value not in the current categories set
185+
with pytest.raises(ValueError):
186+
df = orig.copy()
187+
df.loc["j", "cats"] = "c"
188+
189+
# - assign a complete row (mixed values) -> exp_single_row
190+
df = orig.copy()
191+
df.loc["j", :] = ["b", 2]
192+
tm.assert_frame_equal(df, exp_single_row)
193+
194+
# - assign a complete row (mixed values) not in categories set
195+
with pytest.raises(ValueError):
196+
df = orig.copy()
197+
df.loc["j", :] = ["c", 2]
198+
199+
# - assign multiple rows (mixed values) -> exp_multi_row
200+
df = orig.copy()
201+
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
202+
tm.assert_frame_equal(df, exp_multi_row)
203+
204+
with pytest.raises(ValueError):
205+
df = orig.copy()
206+
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
207+
208+
# assign a part of a column with dtype == categorical ->
209+
# exp_parts_cats_col
210+
df = orig.copy()
211+
df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"])
212+
tm.assert_frame_equal(df, exp_parts_cats_col)
213+
214+
with pytest.raises(ValueError):
215+
# different categories -> not sure if this should fail or pass
216+
df = orig.copy()
217+
df.loc["j":"k", "cats"] = Categorical(
218+
["b", "b"], categories=["a", "b", "c"]
219+
)
220+
221+
with pytest.raises(ValueError):
222+
# different values
223+
df = orig.copy()
224+
df.loc["j":"k", "cats"] = Categorical(
225+
["c", "c"], categories=["a", "b", "c"]
226+
)
227+
228+
# assign a part of a column with dtype != categorical ->
229+
# exp_parts_cats_col
230+
df = orig.copy()
231+
df.loc["j":"k", "cats"] = ["b", "b"]
232+
tm.assert_frame_equal(df, exp_parts_cats_col)
233+
234+
with pytest.raises(ValueError):
235+
df.loc["j":"k", "cats"] = ["c", "c"]
236+
237+
# loc
238+
# ##############
239+
# - assign a single value -> exp_single_cats_value
240+
df = orig.copy()
241+
df.loc["j", df.columns[0]] = "b"
242+
tm.assert_frame_equal(df, exp_single_cats_value)
243+
244+
df = orig.copy()
245+
df.loc[df.index == "j", df.columns[0]] = "b"
246+
tm.assert_frame_equal(df, exp_single_cats_value)
247+
248+
# - assign a single value not in the current categories set
249+
with pytest.raises(ValueError):
250+
df = orig.copy()
251+
df.loc["j", df.columns[0]] = "c"
252+
253+
# - assign a complete row (mixed values) -> exp_single_row
254+
df = orig.copy()
255+
df.loc["j", :] = ["b", 2]
256+
tm.assert_frame_equal(df, exp_single_row)
257+
258+
# - assign a complete row (mixed values) not in categories set
259+
with pytest.raises(ValueError):
260+
df = orig.copy()
261+
df.loc["j", :] = ["c", 2]
262+
263+
# - assign multiple rows (mixed values) -> exp_multi_row
264+
df = orig.copy()
265+
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
266+
tm.assert_frame_equal(df, exp_multi_row)
267+
268+
with pytest.raises(ValueError):
269+
df = orig.copy()
270+
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
271+
272+
# assign a part of a column with dtype == categorical ->
273+
# exp_parts_cats_col
274+
df = orig.copy()
275+
df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"])
276+
tm.assert_frame_equal(df, exp_parts_cats_col)
277+
278+
with pytest.raises(ValueError):
279+
# different categories -> not sure if this should fail or pass
280+
df = orig.copy()
281+
df.loc["j":"k", df.columns[0]] = Categorical(
282+
["b", "b"], categories=["a", "b", "c"]
283+
)
284+
285+
with pytest.raises(ValueError):
286+
# different values
287+
df = orig.copy()
288+
df.loc["j":"k", df.columns[0]] = Categorical(
289+
["c", "c"], categories=["a", "b", "c"]
290+
)
291+
292+
# assign a part of a column with dtype != categorical ->
293+
# exp_parts_cats_col
294+
df = orig.copy()
295+
df.loc["j":"k", df.columns[0]] = ["b", "b"]
296+
tm.assert_frame_equal(df, exp_parts_cats_col)
297+
298+
with pytest.raises(ValueError):
299+
df.loc["j":"k", df.columns[0]] = ["c", "c"]
300+
301+
# iat
302+
df = orig.copy()
303+
df.iat[2, 0] = "b"
304+
tm.assert_frame_equal(df, exp_single_cats_value)
305+
306+
# - assign a single value not in the current categories set
307+
with pytest.raises(ValueError):
308+
df = orig.copy()
309+
df.iat[2, 0] = "c"
310+
311+
# at
312+
# - assign a single value -> exp_single_cats_value
313+
df = orig.copy()
314+
df.at["j", "cats"] = "b"
315+
tm.assert_frame_equal(df, exp_single_cats_value)
316+
317+
# - assign a single value not in the current categories set
318+
with pytest.raises(ValueError):
319+
df = orig.copy()
320+
df.at["j", "cats"] = "c"
321+
322+
# fancy indexing
323+
catsf = Categorical(
324+
["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"]
325+
)
326+
idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
327+
valuesf = [1, 1, 3, 3, 1, 1, 1]
328+
df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)
329+
330+
exp_fancy = exp_multi_row.copy()
331+
exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
332+
333+
df[df["cats"] == "c"] = ["b", 2]
334+
# category c is kept in .categories
335+
tm.assert_frame_equal(df, exp_fancy)
336+
337+
# set_value
338+
df = orig.copy()
339+
df.at["j", "cats"] = "b"
340+
tm.assert_frame_equal(df, exp_single_cats_value)
341+
342+
with pytest.raises(ValueError):
343+
df = orig.copy()
344+
df.at["j", "cats"] = "c"
345+
346+
# Assigning a Category to parts of a int/... column uses the values of
347+
# the Categorical
348+
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
349+
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
350+
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
351+
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
352+
tm.assert_frame_equal(df, exp)
353+
354+
def test_functions_no_warnings(self):
355+
df = DataFrame({"value": np.random.randint(0, 100, 20)})
356+
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
357+
with tm.assert_produces_warning(False):
358+
df["group"] = pd.cut(
359+
df.value, range(0, 105, 10), right=False, labels=labels
360+
)
361+
362+
def test_loc_indexing_preserves_index_category_dtype(self):
363+
# GH 15166
364+
df = DataFrame(
365+
data=np.arange(2, 22, 2),
366+
index=pd.MultiIndex(
367+
levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
368+
codes=[[0] * 5 + [1] * 5, range(10)],
369+
names=["Index1", "Index2"],
370+
),
371+
)
372+
373+
expected = pd.CategoricalIndex(
374+
["a", "b"],
375+
categories=["a", "b"],
376+
ordered=False,
377+
name="Index1",
378+
dtype="category",
379+
)
380+
381+
result = df.index.levels[0]
382+
tm.assert_index_equal(result, expected)
383+
384+
result = df.loc[["a"]].index.levels[0]
385+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)