Skip to content

Commit 2edc7c9

Browse files
authored
TST(string dtype): Resolve some HDF5 xfails (#60615)
* TST(string dtype): Resolve HDF5 xfails * More xfails * Cleanup
1 parent 82f4354 commit 2edc7c9

File tree

4 files changed

+36
-17
lines changed

4 files changed

+36
-17
lines changed

pandas/io/pytables.py

+2
Original file line numberDiff line numberDiff line change
@@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52975297
kind = "integer"
52985298
elif dtype_str == "object":
52995299
kind = "object"
5300+
elif dtype_str == "str":
5301+
kind = "str"
53005302
else:
53015303
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
53025304

pandas/tests/io/pytables/test_file_handling.py

+34-11
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,11 @@
3737

3838
pytestmark = [
3939
pytest.mark.single_cpu,
40-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
4140
]
4241

4342

4443
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
45-
def test_mode(setup_path, tmp_path, mode):
44+
def test_mode(setup_path, tmp_path, mode, using_infer_string):
4645
df = DataFrame(
4746
np.random.default_rng(2).standard_normal((10, 4)),
4847
columns=Index(list("ABCD"), dtype=object),
@@ -91,10 +90,12 @@ def test_mode(setup_path, tmp_path, mode):
9190
read_hdf(path, "df", mode=mode)
9291
else:
9392
result = read_hdf(path, "df", mode=mode)
93+
if using_infer_string:
94+
df.columns = df.columns.astype("str")
9495
tm.assert_frame_equal(result, df)
9596

9697

97-
def test_default_mode(tmp_path, setup_path):
98+
def test_default_mode(tmp_path, setup_path, using_infer_string):
9899
# read_hdf uses default mode
99100
df = DataFrame(
100101
np.random.default_rng(2).standard_normal((10, 4)),
@@ -104,7 +105,10 @@ def test_default_mode(tmp_path, setup_path):
104105
path = tmp_path / setup_path
105106
df.to_hdf(path, key="df", mode="w")
106107
result = read_hdf(path, "df")
107-
tm.assert_frame_equal(result, df)
108+
expected = df.copy()
109+
if using_infer_string:
110+
expected.columns = expected.columns.astype("str")
111+
tm.assert_frame_equal(result, expected)
108112

109113

110114
def test_reopen_handle(tmp_path, setup_path):
@@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
163167
assert not store.is_open
164168

165169

166-
def test_open_args(setup_path):
170+
def test_open_args(setup_path, using_infer_string):
167171
with tm.ensure_clean(setup_path) as path:
168172
df = DataFrame(
169173
1.1 * np.arange(120).reshape((30, 4)),
@@ -178,8 +182,13 @@ def test_open_args(setup_path):
178182
store["df"] = df
179183
store.append("df2", df)
180184

181-
tm.assert_frame_equal(store["df"], df)
182-
tm.assert_frame_equal(store["df2"], df)
185+
expected = df.copy()
186+
if using_infer_string:
187+
expected.index = expected.index.astype("str")
188+
expected.columns = expected.columns.astype("str")
189+
190+
tm.assert_frame_equal(store["df"], expected)
191+
tm.assert_frame_equal(store["df2"], expected)
183192

184193
store.close()
185194

@@ -194,7 +203,7 @@ def test_flush(setup_path):
194203
store.flush(fsync=True)
195204

196205

197-
def test_complibs_default_settings(tmp_path, setup_path):
206+
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
198207
# GH15943
199208
df = DataFrame(
200209
1.1 * np.arange(120).reshape((30, 4)),
@@ -207,7 +216,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
207216
tmpfile = tmp_path / setup_path
208217
df.to_hdf(tmpfile, key="df", complevel=9)
209218
result = read_hdf(tmpfile, "df")
210-
tm.assert_frame_equal(result, df)
219+
expected = df.copy()
220+
if using_infer_string:
221+
expected.index = expected.index.astype("str")
222+
expected.columns = expected.columns.astype("str")
223+
tm.assert_frame_equal(result, expected)
211224

212225
with tables.open_file(tmpfile, mode="r") as h5file:
213226
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -218,7 +231,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
218231
tmpfile = tmp_path / setup_path
219232
df.to_hdf(tmpfile, key="df", complib="zlib")
220233
result = read_hdf(tmpfile, "df")
221-
tm.assert_frame_equal(result, df)
234+
expected = df.copy()
235+
if using_infer_string:
236+
expected.index = expected.index.astype("str")
237+
expected.columns = expected.columns.astype("str")
238+
tm.assert_frame_equal(result, expected)
222239

223240
with tables.open_file(tmpfile, mode="r") as h5file:
224241
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -229,7 +246,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
229246
tmpfile = tmp_path / setup_path
230247
df.to_hdf(tmpfile, key="df")
231248
result = read_hdf(tmpfile, "df")
232-
tm.assert_frame_equal(result, df)
249+
expected = df.copy()
250+
if using_infer_string:
251+
expected.index = expected.index.astype("str")
252+
expected.columns = expected.columns.astype("str")
253+
tm.assert_frame_equal(result, expected)
233254

234255
with tables.open_file(tmpfile, mode="r") as h5file:
235256
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -308,6 +329,7 @@ def test_complibs(tmp_path, lvl, lib, request):
308329
assert node.filters.complib == lib
309330

310331

332+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
311333
@pytest.mark.skipif(
312334
not is_platform_little_endian(), reason="reason platform is not little endian"
313335
)
@@ -325,6 +347,7 @@ def test_encoding(setup_path):
325347
tm.assert_frame_equal(result, expected)
326348

327349

350+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
328351
@pytest.mark.parametrize(
329352
"val",
330353
[

pandas/tests/io/pytables/test_subclass.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
Series,
@@ -19,7 +17,6 @@
1917

2018
class TestHDFStoreSubclass:
2119
# GH 33748
22-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
2320
def test_supported_for_subclass_dataframe(self, tmp_path):
2421
data = {"a": [1, 2], "b": [3, 4]}
2522
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)

pandas/tests/io/test_common.py

-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
import numpy as np
2020
import pytest
2121

22-
from pandas._config import using_string_dtype
23-
2422
from pandas.compat import (
2523
WASM,
2624
is_platform_windows,
@@ -365,7 +363,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
365363
expected = f_path.read()
366364
assert result == expected
367365

368-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
369366
def test_write_fspath_hdf5(self):
370367
# Same test as write_fspath_all, except HDF5 files aren't
371368
# necessarily byte-for-byte identical for a given dataframe, so we'll

0 commit comments

Comments
 (0)