Skip to content

Commit a6bc6ec

Browse files
authored
TST GH26807 Break up pandas/tests/io/pytables/test_store.py (#39072)
1 parent 4a3fa69 commit a6bc6ec

12 files changed

+5048
-4775
lines changed

pandas/tests/io/pytables/test_append.py

+927
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import Categorical, DataFrame, Series, _testing as tm, concat, read_hdf
5+
from pandas.tests.io.pytables.common import (
6+
_maybe_remove,
7+
ensure_clean_path,
8+
ensure_clean_store,
9+
)
10+
11+
pytestmark = pytest.mark.single
12+
13+
14+
def test_categorical(setup_path):
15+
16+
with ensure_clean_store(setup_path) as store:
17+
18+
# Basic
19+
_maybe_remove(store, "s")
20+
s = Series(
21+
Categorical(
22+
["a", "b", "b", "a", "a", "c"],
23+
categories=["a", "b", "c", "d"],
24+
ordered=False,
25+
)
26+
)
27+
store.append("s", s, format="table")
28+
result = store.select("s")
29+
tm.assert_series_equal(s, result)
30+
31+
_maybe_remove(store, "s_ordered")
32+
s = Series(
33+
Categorical(
34+
["a", "b", "b", "a", "a", "c"],
35+
categories=["a", "b", "c", "d"],
36+
ordered=True,
37+
)
38+
)
39+
store.append("s_ordered", s, format="table")
40+
result = store.select("s_ordered")
41+
tm.assert_series_equal(s, result)
42+
43+
_maybe_remove(store, "df")
44+
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
45+
store.append("df", df, format="table")
46+
result = store.select("df")
47+
tm.assert_frame_equal(result, df)
48+
49+
# Dtypes
50+
_maybe_remove(store, "si")
51+
s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
52+
store.append("si", s)
53+
result = store.select("si")
54+
tm.assert_series_equal(result, s)
55+
56+
_maybe_remove(store, "si2")
57+
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
58+
store.append("si2", s)
59+
result = store.select("si2")
60+
tm.assert_series_equal(result, s)
61+
62+
# Multiple
63+
_maybe_remove(store, "df2")
64+
df2 = df.copy()
65+
df2["s2"] = Series(list("abcdefg")).astype("category")
66+
store.append("df2", df2)
67+
result = store.select("df2")
68+
tm.assert_frame_equal(result, df2)
69+
70+
# Make sure the metadata is OK
71+
info = store.info()
72+
assert "/df2 " in info
73+
# assert '/df2/meta/values_block_0/meta' in info
74+
assert "/df2/meta/values_block_1/meta" in info
75+
76+
# unordered
77+
_maybe_remove(store, "s2")
78+
s = Series(
79+
Categorical(
80+
["a", "b", "b", "a", "a", "c"],
81+
categories=["a", "b", "c", "d"],
82+
ordered=False,
83+
)
84+
)
85+
store.append("s2", s, format="table")
86+
result = store.select("s2")
87+
tm.assert_series_equal(result, s)
88+
89+
# Query
90+
_maybe_remove(store, "df3")
91+
store.append("df3", df, data_columns=["s"])
92+
expected = df[df.s.isin(["b", "c"])]
93+
result = store.select("df3", where=['s in ["b","c"]'])
94+
tm.assert_frame_equal(result, expected)
95+
96+
expected = df[df.s.isin(["b", "c"])]
97+
result = store.select("df3", where=['s = ["b","c"]'])
98+
tm.assert_frame_equal(result, expected)
99+
100+
expected = df[df.s.isin(["d"])]
101+
result = store.select("df3", where=['s in ["d"]'])
102+
tm.assert_frame_equal(result, expected)
103+
104+
expected = df[df.s.isin(["f"])]
105+
result = store.select("df3", where=['s in ["f"]'])
106+
tm.assert_frame_equal(result, expected)
107+
108+
# Appending with same categories is ok
109+
store.append("df3", df)
110+
111+
df = concat([df, df])
112+
expected = df[df.s.isin(["b", "c"])]
113+
result = store.select("df3", where=['s in ["b","c"]'])
114+
tm.assert_frame_equal(result, expected)
115+
116+
# Appending must have the same categories
117+
df3 = df.copy()
118+
df3["s"] = df3["s"].cat.remove_unused_categories()
119+
120+
msg = "cannot append a categorical with different categories to the existing"
121+
with pytest.raises(ValueError, match=msg):
122+
store.append("df3", df3)
123+
124+
# Remove, and make sure meta data is removed (its a recursive
125+
# removal so should be).
126+
result = store.select("df3/meta/s/meta")
127+
assert result is not None
128+
store.remove("df3")
129+
130+
with pytest.raises(
131+
KeyError, match="'No object named df3/meta/s/meta in the file'"
132+
):
133+
store.select("df3/meta/s/meta")
134+
135+
136+
def test_categorical_conversion(setup_path):
137+
138+
# GH13322
139+
# Check that read_hdf with categorical columns doesn't return rows if
140+
# where criteria isn't met.
141+
obsids = ["ESP_012345_6789", "ESP_987654_3210"]
142+
imgids = ["APF00006np", "APF0001imm"]
143+
data = [4.3, 9.8]
144+
145+
# Test without categories
146+
df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
147+
148+
# We are expecting an empty DataFrame matching types of df
149+
expected = df.iloc[[], :]
150+
with ensure_clean_path(setup_path) as path:
151+
df.to_hdf(path, "df", format="table", data_columns=True)
152+
result = read_hdf(path, "df", where="obsids=B")
153+
tm.assert_frame_equal(result, expected)
154+
155+
# Test with categories
156+
df.obsids = df.obsids.astype("category")
157+
df.imgids = df.imgids.astype("category")
158+
159+
# We are expecting an empty DataFrame matching types of df
160+
expected = df.iloc[[], :]
161+
with ensure_clean_path(setup_path) as path:
162+
df.to_hdf(path, "df", format="table", data_columns=True)
163+
result = read_hdf(path, "df", where="obsids=B")
164+
tm.assert_frame_equal(result, expected)
165+
166+
167+
def test_categorical_nan_only_columns(setup_path):
168+
# GH18413
169+
# Check that read_hdf with categorical columns with NaN-only values can
170+
# be read back.
171+
df = DataFrame(
172+
{
173+
"a": ["a", "b", "c", np.nan],
174+
"b": [np.nan, np.nan, np.nan, np.nan],
175+
"c": [1, 2, 3, 4],
176+
"d": Series([None] * 4, dtype=object),
177+
}
178+
)
179+
df["a"] = df.a.astype("category")
180+
df["b"] = df.b.astype("category")
181+
df["d"] = df.b.astype("category")
182+
expected = df
183+
with ensure_clean_path(setup_path) as path:
184+
df.to_hdf(path, "df", format="table", data_columns=True)
185+
result = read_hdf(path, "df")
186+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)