Skip to content

Commit a993738

Browse files
committed
Backport PR pandas-dev#60615: TST(string dtype): Resolve some HDF5 xfails
1 parent fb075b5 commit a993738

File tree

4 files changed

+36
-17
lines changed

4 files changed

+36
-17
lines changed

pandas/io/pytables.py

+2
Original file line numberDiff line numberDiff line change
@@ -5274,6 +5274,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52745274
kind = "integer"
52755275
elif dtype_str == "object":
52765276
kind = "object"
5277+
elif dtype_str == "str":
5278+
kind = "str"
52775279
else:
52785280
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
52795281

pandas/tests/io/pytables/test_file_handling.py

+34-11
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,11 @@
3636

3737
pytestmark = [
3838
pytest.mark.single_cpu,
39-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
4039
]
4140

4241

4342
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
44-
def test_mode(setup_path, tmp_path, mode):
43+
def test_mode(setup_path, tmp_path, mode, using_infer_string):
4544
df = DataFrame(
4645
np.random.default_rng(2).standard_normal((10, 4)),
4746
columns=Index(list("ABCD"), dtype=object),
@@ -90,10 +89,12 @@ def test_mode(setup_path, tmp_path, mode):
9089
read_hdf(path, "df", mode=mode)
9190
else:
9291
result = read_hdf(path, "df", mode=mode)
92+
if using_infer_string:
93+
df.columns = df.columns.astype("str")
9394
tm.assert_frame_equal(result, df)
9495

9596

96-
def test_default_mode(tmp_path, setup_path):
97+
def test_default_mode(tmp_path, setup_path, using_infer_string):
9798
# read_hdf uses default mode
9899
df = DataFrame(
99100
np.random.default_rng(2).standard_normal((10, 4)),
@@ -103,7 +104,10 @@ def test_default_mode(tmp_path, setup_path):
103104
path = tmp_path / setup_path
104105
df.to_hdf(path, key="df", mode="w")
105106
result = read_hdf(path, "df")
106-
tm.assert_frame_equal(result, df)
107+
expected = df.copy()
108+
if using_infer_string:
109+
expected.columns = expected.columns.astype("str")
110+
tm.assert_frame_equal(result, expected)
107111

108112

109113
def test_reopen_handle(tmp_path, setup_path):
@@ -162,7 +166,7 @@ def test_reopen_handle(tmp_path, setup_path):
162166
assert not store.is_open
163167

164168

165-
def test_open_args(setup_path):
169+
def test_open_args(setup_path, using_infer_string):
166170
with tm.ensure_clean(setup_path) as path:
167171
df = DataFrame(
168172
1.1 * np.arange(120).reshape((30, 4)),
@@ -177,8 +181,13 @@ def test_open_args(setup_path):
177181
store["df"] = df
178182
store.append("df2", df)
179183

180-
tm.assert_frame_equal(store["df"], df)
181-
tm.assert_frame_equal(store["df2"], df)
184+
expected = df.copy()
185+
if using_infer_string:
186+
expected.index = expected.index.astype("str")
187+
expected.columns = expected.columns.astype("str")
188+
189+
tm.assert_frame_equal(store["df"], expected)
190+
tm.assert_frame_equal(store["df2"], expected)
182191

183192
store.close()
184193

@@ -193,7 +202,7 @@ def test_flush(setup_path):
193202
store.flush(fsync=True)
194203

195204

196-
def test_complibs_default_settings(tmp_path, setup_path):
205+
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
197206
# GH15943
198207
df = DataFrame(
199208
1.1 * np.arange(120).reshape((30, 4)),
@@ -206,7 +215,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
206215
tmpfile = tmp_path / setup_path
207216
df.to_hdf(tmpfile, key="df", complevel=9)
208217
result = read_hdf(tmpfile, "df")
209-
tm.assert_frame_equal(result, df)
218+
expected = df.copy()
219+
if using_infer_string:
220+
expected.index = expected.index.astype("str")
221+
expected.columns = expected.columns.astype("str")
222+
tm.assert_frame_equal(result, expected)
210223

211224
with tables.open_file(tmpfile, mode="r") as h5file:
212225
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -217,7 +230,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
217230
tmpfile = tmp_path / setup_path
218231
df.to_hdf(tmpfile, key="df", complib="zlib")
219232
result = read_hdf(tmpfile, "df")
220-
tm.assert_frame_equal(result, df)
233+
expected = df.copy()
234+
if using_infer_string:
235+
expected.index = expected.index.astype("str")
236+
expected.columns = expected.columns.astype("str")
237+
tm.assert_frame_equal(result, expected)
221238

222239
with tables.open_file(tmpfile, mode="r") as h5file:
223240
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -228,7 +245,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
228245
tmpfile = tmp_path / setup_path
229246
df.to_hdf(tmpfile, key="df")
230247
result = read_hdf(tmpfile, "df")
231-
tm.assert_frame_equal(result, df)
248+
expected = df.copy()
249+
if using_infer_string:
250+
expected.index = expected.index.astype("str")
251+
expected.columns = expected.columns.astype("str")
252+
tm.assert_frame_equal(result, expected)
232253

233254
with tables.open_file(tmpfile, mode="r") as h5file:
234255
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -302,6 +323,7 @@ def test_complibs(tmp_path, lvl, lib, request):
302323
assert node.filters.complib == lib
303324

304325

326+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
305327
@pytest.mark.skipif(
306328
not is_platform_little_endian(), reason="reason platform is not little endian"
307329
)
@@ -319,6 +341,7 @@ def test_encoding(setup_path):
319341
tm.assert_frame_equal(result, expected)
320342

321343

344+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
322345
@pytest.mark.parametrize(
323346
"val",
324347
[

pandas/tests/io/pytables/test_subclass.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
Series,
@@ -19,7 +17,6 @@
1917

2018
class TestHDFStoreSubclass:
2119
# GH 33748
22-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
2320
def test_supported_for_subclass_dataframe(self, tmp_path):
2421
data = {"a": [1, 2], "b": [3, 4]}
2522
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)

pandas/tests/io/test_common.py

-3
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
import numpy as np
1919
import pytest
2020

21-
from pandas._config import using_string_dtype
22-
2321
from pandas.compat import is_platform_windows
2422
import pandas.util._test_decorators as td
2523

@@ -379,7 +377,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
379377
expected = f_path.read()
380378
assert result == expected
381379

382-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
383380
def test_write_fspath_hdf5(self):
384381
# Same test as write_fspath_all, except HDF5 files aren't
385382
# necessarily byte-for-byte identical for a given dataframe, so we'll

0 commit comments

Comments
 (0)