Skip to content

TST(string dtype): Resolve some HDF5 xfails #60615

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
kind = "integer"
elif dtype_str == "object":
kind = "object"
elif dtype_str == "str":
kind = "str"
else:
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")

Expand Down
45 changes: 34 additions & 11 deletions pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]


@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
def test_mode(setup_path, tmp_path, mode):
def test_mode(setup_path, tmp_path, mode, using_infer_string):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
Expand Down Expand Up @@ -91,10 +90,12 @@ def test_mode(setup_path, tmp_path, mode):
read_hdf(path, "df", mode=mode)
else:
result = read_hdf(path, "df", mode=mode)
if using_infer_string:
df.columns = df.columns.astype("str")
tm.assert_frame_equal(result, df)


def test_default_mode(tmp_path, setup_path):
def test_default_mode(tmp_path, setup_path, using_infer_string):
# read_hdf uses default mode
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
Expand All @@ -104,7 +105,10 @@ def test_default_mode(tmp_path, setup_path):
path = tmp_path / setup_path
df.to_hdf(path, key="df", mode="w")
result = read_hdf(path, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)


def test_reopen_handle(tmp_path, setup_path):
Expand Down Expand Up @@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
assert not store.is_open


def test_open_args(setup_path):
def test_open_args(setup_path, using_infer_string):
with tm.ensure_clean(setup_path) as path:
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -178,8 +182,13 @@ def test_open_args(setup_path):
store["df"] = df
store.append("df2", df)

tm.assert_frame_equal(store["df"], df)
tm.assert_frame_equal(store["df2"], df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")

tm.assert_frame_equal(store["df"], expected)
tm.assert_frame_equal(store["df2"], expected)

store.close()

Expand All @@ -194,7 +203,7 @@ def test_flush(setup_path):
store.flush(fsync=True)


def test_complibs_default_settings(tmp_path, setup_path):
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
# GH15943
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -207,7 +216,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complevel=9)
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -218,7 +231,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complib="zlib")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -229,7 +246,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand Down Expand Up @@ -308,6 +329,7 @@ def test_complibs(tmp_path, lvl, lib, request):
assert node.filters.complib == lib


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.skipif(
not is_platform_little_endian(), reason="reason platform is not little endian"
)
Expand All @@ -325,6 +347,7 @@ def test_encoding(setup_path):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"val",
[
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Series,
Expand All @@ -19,7 +17,6 @@

class TestHDFStoreSubclass:
# GH 33748
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_supported_for_subclass_dataframe(self, tmp_path):
data = {"a": [1, 2], "b": [3, 4]}
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
WASM,
is_platform_windows,
Expand Down Expand Up @@ -365,7 +363,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
expected = f_path.read()
assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
def test_write_fspath_hdf5(self):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
Expand Down
Loading