Skip to content

Commit b8bdd73

Browse files
prakhar987proost
authored andcommitted
TST: Split pandas/tests/frame/test_indexing into a directory (pandas-dev#29544) (pandas-dev#29694)
1 parent 94f0517 commit b8bdd73

File tree

4 files changed

+1033
-1012
lines changed

4 files changed

+1033
-1012
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,388 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.core.dtypes.dtypes import CategoricalDtype
5+
6+
import pandas as pd
7+
from pandas import Categorical, DataFrame, Index, Series
8+
import pandas.util.testing as tm
9+
10+
11+
class TestDataFrameIndexingCategorical:
12+
def test_assignment(self):
13+
# assignment
14+
df = DataFrame(
15+
{"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")}
16+
)
17+
labels = Categorical(
18+
["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
19+
)
20+
21+
df = df.sort_values(by=["value"], ascending=True)
22+
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
23+
d = s.values
24+
df["D"] = d
25+
str(df)
26+
27+
result = df.dtypes
28+
expected = Series(
29+
[np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)],
30+
index=["value", "D"],
31+
)
32+
tm.assert_series_equal(result, expected)
33+
34+
df["E"] = s
35+
str(df)
36+
37+
result = df.dtypes
38+
expected = Series(
39+
[
40+
np.dtype("int32"),
41+
CategoricalDtype(categories=labels, ordered=False),
42+
CategoricalDtype(categories=labels, ordered=False),
43+
],
44+
index=["value", "D", "E"],
45+
)
46+
tm.assert_series_equal(result, expected)
47+
48+
result1 = df["D"]
49+
result2 = df["E"]
50+
tm.assert_categorical_equal(result1._data._block.values, d)
51+
52+
# sorting
53+
s.name = "E"
54+
tm.assert_series_equal(result2.sort_index(), s.sort_index())
55+
56+
cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
57+
df = DataFrame(Series(cat))
58+
59+
def test_assigning_ops(self):
60+
# systematically test the assigning operations:
61+
# for all slicing ops:
62+
# for value in categories and value not in categories:
63+
64+
# - assign a single value -> exp_single_cats_value
65+
66+
# - assign a complete row (mixed values) -> exp_single_row
67+
68+
# assign multiple rows (mixed values) (-> array) -> exp_multi_row
69+
70+
# assign a part of a column with dtype == categorical ->
71+
# exp_parts_cats_col
72+
73+
# assign a part of a column with dtype != categorical ->
74+
# exp_parts_cats_col
75+
76+
cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
77+
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
78+
values = [1, 1, 1, 1, 1, 1, 1]
79+
orig = DataFrame({"cats": cats, "values": values}, index=idx)
80+
81+
# the expected values
82+
# changed single row
83+
cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
84+
idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
85+
values1 = [1, 1, 2, 1, 1, 1, 1]
86+
exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1)
87+
88+
# changed multiple rows
89+
cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
90+
idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
91+
values2 = [1, 1, 2, 2, 1, 1, 1]
92+
exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2)
93+
94+
# changed part of the cats column
95+
cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
96+
idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
97+
values3 = [1, 1, 1, 1, 1, 1, 1]
98+
exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3)
99+
100+
# changed single value in cats col
101+
cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
102+
idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
103+
values4 = [1, 1, 1, 1, 1, 1, 1]
104+
exp_single_cats_value = DataFrame(
105+
{"cats": cats4, "values": values4}, index=idx4
106+
)
107+
108+
# iloc
109+
# ###############
110+
# - assign a single value -> exp_single_cats_value
111+
df = orig.copy()
112+
df.iloc[2, 0] = "b"
113+
tm.assert_frame_equal(df, exp_single_cats_value)
114+
115+
df = orig.copy()
116+
df.iloc[df.index == "j", 0] = "b"
117+
tm.assert_frame_equal(df, exp_single_cats_value)
118+
119+
# - assign a single value not in the current categories set
120+
with pytest.raises(ValueError):
121+
df = orig.copy()
122+
df.iloc[2, 0] = "c"
123+
124+
# - assign a complete row (mixed values) -> exp_single_row
125+
df = orig.copy()
126+
df.iloc[2, :] = ["b", 2]
127+
tm.assert_frame_equal(df, exp_single_row)
128+
129+
# - assign a complete row (mixed values) not in categories set
130+
with pytest.raises(ValueError):
131+
df = orig.copy()
132+
df.iloc[2, :] = ["c", 2]
133+
134+
# - assign multiple rows (mixed values) -> exp_multi_row
135+
df = orig.copy()
136+
df.iloc[2:4, :] = [["b", 2], ["b", 2]]
137+
tm.assert_frame_equal(df, exp_multi_row)
138+
139+
with pytest.raises(ValueError):
140+
df = orig.copy()
141+
df.iloc[2:4, :] = [["c", 2], ["c", 2]]
142+
143+
# assign a part of a column with dtype == categorical ->
144+
# exp_parts_cats_col
145+
df = orig.copy()
146+
df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
147+
tm.assert_frame_equal(df, exp_parts_cats_col)
148+
149+
with pytest.raises(ValueError):
150+
# different categories -> not sure if this should fail or pass
151+
df = orig.copy()
152+
df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc"))
153+
154+
with pytest.raises(ValueError):
155+
# different values
156+
df = orig.copy()
157+
df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc"))
158+
159+
# assign a part of a column with dtype != categorical ->
160+
# exp_parts_cats_col
161+
df = orig.copy()
162+
df.iloc[2:4, 0] = ["b", "b"]
163+
tm.assert_frame_equal(df, exp_parts_cats_col)
164+
165+
with pytest.raises(ValueError):
166+
df.iloc[2:4, 0] = ["c", "c"]
167+
168+
# loc
169+
# ##############
170+
# - assign a single value -> exp_single_cats_value
171+
df = orig.copy()
172+
df.loc["j", "cats"] = "b"
173+
tm.assert_frame_equal(df, exp_single_cats_value)
174+
175+
df = orig.copy()
176+
df.loc[df.index == "j", "cats"] = "b"
177+
tm.assert_frame_equal(df, exp_single_cats_value)
178+
179+
# - assign a single value not in the current categories set
180+
with pytest.raises(ValueError):
181+
df = orig.copy()
182+
df.loc["j", "cats"] = "c"
183+
184+
# - assign a complete row (mixed values) -> exp_single_row
185+
df = orig.copy()
186+
df.loc["j", :] = ["b", 2]
187+
tm.assert_frame_equal(df, exp_single_row)
188+
189+
# - assign a complete row (mixed values) not in categories set
190+
with pytest.raises(ValueError):
191+
df = orig.copy()
192+
df.loc["j", :] = ["c", 2]
193+
194+
# - assign multiple rows (mixed values) -> exp_multi_row
195+
df = orig.copy()
196+
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
197+
tm.assert_frame_equal(df, exp_multi_row)
198+
199+
with pytest.raises(ValueError):
200+
df = orig.copy()
201+
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
202+
203+
# assign a part of a column with dtype == categorical ->
204+
# exp_parts_cats_col
205+
df = orig.copy()
206+
df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"])
207+
tm.assert_frame_equal(df, exp_parts_cats_col)
208+
209+
with pytest.raises(ValueError):
210+
# different categories -> not sure if this should fail or pass
211+
df = orig.copy()
212+
df.loc["j":"k", "cats"] = Categorical(
213+
["b", "b"], categories=["a", "b", "c"]
214+
)
215+
216+
with pytest.raises(ValueError):
217+
# different values
218+
df = orig.copy()
219+
df.loc["j":"k", "cats"] = Categorical(
220+
["c", "c"], categories=["a", "b", "c"]
221+
)
222+
223+
# assign a part of a column with dtype != categorical ->
224+
# exp_parts_cats_col
225+
df = orig.copy()
226+
df.loc["j":"k", "cats"] = ["b", "b"]
227+
tm.assert_frame_equal(df, exp_parts_cats_col)
228+
229+
with pytest.raises(ValueError):
230+
df.loc["j":"k", "cats"] = ["c", "c"]
231+
232+
# loc
233+
# ##############
234+
# - assign a single value -> exp_single_cats_value
235+
df = orig.copy()
236+
df.loc["j", df.columns[0]] = "b"
237+
tm.assert_frame_equal(df, exp_single_cats_value)
238+
239+
df = orig.copy()
240+
df.loc[df.index == "j", df.columns[0]] = "b"
241+
tm.assert_frame_equal(df, exp_single_cats_value)
242+
243+
# - assign a single value not in the current categories set
244+
with pytest.raises(ValueError):
245+
df = orig.copy()
246+
df.loc["j", df.columns[0]] = "c"
247+
248+
# - assign a complete row (mixed values) -> exp_single_row
249+
df = orig.copy()
250+
df.loc["j", :] = ["b", 2]
251+
tm.assert_frame_equal(df, exp_single_row)
252+
253+
# - assign a complete row (mixed values) not in categories set
254+
with pytest.raises(ValueError):
255+
df = orig.copy()
256+
df.loc["j", :] = ["c", 2]
257+
258+
# - assign multiple rows (mixed values) -> exp_multi_row
259+
df = orig.copy()
260+
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
261+
tm.assert_frame_equal(df, exp_multi_row)
262+
263+
with pytest.raises(ValueError):
264+
df = orig.copy()
265+
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
266+
267+
# assign a part of a column with dtype == categorical ->
268+
# exp_parts_cats_col
269+
df = orig.copy()
270+
df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"])
271+
tm.assert_frame_equal(df, exp_parts_cats_col)
272+
273+
with pytest.raises(ValueError):
274+
# different categories -> not sure if this should fail or pass
275+
df = orig.copy()
276+
df.loc["j":"k", df.columns[0]] = Categorical(
277+
["b", "b"], categories=["a", "b", "c"]
278+
)
279+
280+
with pytest.raises(ValueError):
281+
# different values
282+
df = orig.copy()
283+
df.loc["j":"k", df.columns[0]] = Categorical(
284+
["c", "c"], categories=["a", "b", "c"]
285+
)
286+
287+
# assign a part of a column with dtype != categorical ->
288+
# exp_parts_cats_col
289+
df = orig.copy()
290+
df.loc["j":"k", df.columns[0]] = ["b", "b"]
291+
tm.assert_frame_equal(df, exp_parts_cats_col)
292+
293+
with pytest.raises(ValueError):
294+
df.loc["j":"k", df.columns[0]] = ["c", "c"]
295+
296+
# iat
297+
df = orig.copy()
298+
df.iat[2, 0] = "b"
299+
tm.assert_frame_equal(df, exp_single_cats_value)
300+
301+
# - assign a single value not in the current categories set
302+
with pytest.raises(ValueError):
303+
df = orig.copy()
304+
df.iat[2, 0] = "c"
305+
306+
# at
307+
# - assign a single value -> exp_single_cats_value
308+
df = orig.copy()
309+
df.at["j", "cats"] = "b"
310+
tm.assert_frame_equal(df, exp_single_cats_value)
311+
312+
# - assign a single value not in the current categories set
313+
with pytest.raises(ValueError):
314+
df = orig.copy()
315+
df.at["j", "cats"] = "c"
316+
317+
# fancy indexing
318+
catsf = Categorical(
319+
["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"]
320+
)
321+
idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
322+
valuesf = [1, 1, 3, 3, 1, 1, 1]
323+
df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)
324+
325+
exp_fancy = exp_multi_row.copy()
326+
exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
327+
328+
df[df["cats"] == "c"] = ["b", 2]
329+
# category c is kept in .categories
330+
tm.assert_frame_equal(df, exp_fancy)
331+
332+
# set_value
333+
df = orig.copy()
334+
df.at["j", "cats"] = "b"
335+
tm.assert_frame_equal(df, exp_single_cats_value)
336+
337+
with pytest.raises(ValueError):
338+
df = orig.copy()
339+
df.at["j", "cats"] = "c"
340+
341+
# Assigning a Category to parts of a int/... column uses the values of
342+
# the Categorical
343+
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
344+
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
345+
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
346+
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
347+
tm.assert_frame_equal(df, exp)
348+
349+
def test_functions_no_warnings(self):
350+
df = DataFrame({"value": np.random.randint(0, 100, 20)})
351+
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
352+
with tm.assert_produces_warning(False):
353+
df["group"] = pd.cut(
354+
df.value, range(0, 105, 10), right=False, labels=labels
355+
)
356+
357+
def test_loc_indexing_preserves_index_category_dtype(self):
358+
# GH 15166
359+
df = DataFrame(
360+
data=np.arange(2, 22, 2),
361+
index=pd.MultiIndex(
362+
levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
363+
codes=[[0] * 5 + [1] * 5, range(10)],
364+
names=["Index1", "Index2"],
365+
),
366+
)
367+
368+
expected = pd.CategoricalIndex(
369+
["a", "b"],
370+
categories=["a", "b"],
371+
ordered=False,
372+
name="Index1",
373+
dtype="category",
374+
)
375+
376+
result = df.index.levels[0]
377+
tm.assert_index_equal(result, expected)
378+
379+
result = df.loc[["a"]].index.levels[0]
380+
tm.assert_index_equal(result, expected)
381+
382+
def test_wrong_length_cat_dtype_raises(self):
383+
# GH29523
384+
cat = pd.Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"])
385+
df = pd.DataFrame({"bar": range(10)})
386+
err = "Length of values does not match length of index"
387+
with pytest.raises(ValueError, match=err):
388+
df["foo"] = cat

0 commit comments

Comments
 (0)