-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Test pytables refactor #28746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Test pytables refactor #28746
Changes from 27 commits
b7450c7
480184d
31d6836
e3242f4
1985c96
e93f3df
80d6aaf
7f0842b
882ba8e
54483fb
cd6a2c9
eb14b23
7b640fc
44c56f1
595670e
515825a
6106a24
8f540a7
cfdd437
6375977
aa97d78
9931352
27d62e4
2e83dfd
eeac6ce
78c710b
a38bc5e
6d97676
64ec41d
49e9d31
e5cecce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from contextlib import contextmanager | ||
import os | ||
import tempfile | ||
|
||
from pandas.io.pytables import HDFStore | ||
|
||
|
||
def safe_remove(path): | ||
if path is not None: | ||
try: | ||
os.remove(path) | ||
except OSError: | ||
pass | ||
|
||
|
||
def safe_close(store): | ||
try: | ||
if store is not None: | ||
store.close() | ||
except IOError: | ||
pass | ||
|
||
|
||
def create_tempfile(path): | ||
""" create an unopened named temporary file """ | ||
return os.path.join(tempfile.gettempdir(), path) | ||
|
||
|
||
# contextmanager to ensure the file cleanup | ||
@contextmanager | ||
def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False): | ||
|
||
try: | ||
|
||
# put in the temporary path if we don't have one already | ||
if not len(os.path.dirname(path)): | ||
path = create_tempfile(path) | ||
|
||
store = HDFStore( | ||
path, mode=mode, complevel=complevel, complib=complib, fletcher32=False | ||
) | ||
yield store | ||
finally: | ||
safe_close(store) | ||
if mode == "w" or mode == "a": | ||
safe_remove(path) | ||
|
||
|
||
@contextmanager | ||
def ensure_clean_path(path): | ||
""" | ||
return essentially a named temporary file that is not opened | ||
and deleted on exiting; if path is a list, then create and | ||
return list of filenames | ||
""" | ||
try: | ||
if isinstance(path, list): | ||
filenames = [create_tempfile(p) for p in path] | ||
yield filenames | ||
else: | ||
filenames = [create_tempfile(path)] | ||
yield filenames[0] | ||
finally: | ||
for f in filenames: | ||
safe_remove(f) | ||
|
||
|
||
def _maybe_remove(store, key): | ||
"""For tests using tables, try removing the table to be sure there is | ||
no content from previous tests using the same table name.""" | ||
try: | ||
store.remove(key) | ||
except (ValueError, KeyError): | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import pytest | ||
|
||
import pandas.util.testing as tm | ||
|
||
|
||
@pytest.fixture | ||
def setup_path(): | ||
"""Fixture for setup path""" | ||
return "tmp.__{}__.h5".format(tm.rands(10)) | ||
|
||
|
||
@pytest.fixture(scope="module", autouse=True) | ||
def setup_mode(): | ||
""" Reset testing mode fixture""" | ||
tm.reset_testing_mode() | ||
yield | ||
tm.set_testing_mode() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
from warnings import catch_warnings | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
import pandas.util._test_decorators as td | ||
|
||
import pandas as pd | ||
from pandas import DataFrame, Series | ||
from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store | ||
import pandas.util.testing as tm | ||
from pandas.util.testing import assert_frame_equal | ||
|
||
from pandas.io.pytables import read_hdf | ||
|
||
tables = pytest.importorskip("tables") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right can you move these to common.py (the tables.parameters.*) that are repeated across files. you can also move the pytest.importorskip to common.py as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But this would mean having to import pytest in your common module, is this something you'd be ok with? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback would this be acceptable? diff --git a/pandas/tests/io/pytables/conftest.py b/pandas/tests/io/pytables/conftest.py
index 6164f5d07..db00b1e33 100644
--- a/pandas/tests/io/pytables/conftest.py
+++ b/pandas/tests/io/pytables/conftest.py
@@ -15,3 +15,14 @@ def setup_mode():
tm.reset_testing_mode()
yield
tm.set_testing_mode()
+
+
+@pytest.fixture(scope="module", autouse=True)
+def set_parameters():
+ """
+ Set pytables parameters so we don't have file sharing.
+ """
+ tables = pytest.importorskip("tables")
+ tables.parameters.MAX_NUMEXPR_THREADS = 1
+ tables.parameters.MAX_BLOSC_THREADS = 1
+ tables.parameters.MAX_THREADS = 1
diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py
index 96c4a3042..37739b1fd 100644
--- a/pandas/tests/io/pytables/test_complex.py
+++ b/pandas/tests/io/pytables/test_complex.py
@@ -14,10 +14,6 @@ from pandas.util.testing import assert_frame_equal
from pandas.io.pytables import read_hdf
tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
# GH10447
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 97fd89903..e31cb0cf6 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -56,10 +56,6 @@ from pandas.io.pytables import TableIterator # noqa: E402 isort:skip
tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
_default_compressor = "blosc"
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
index 3e91c6381..b18fdc7b6 100644
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -16,10 +16,6 @@ import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, set_timezone
tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
def _compare_with_tz(a, b):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the other alternative is to monkeypatch.. @pytest.fixture(autouse=True)
def set_parameters(monkeypatch):
"""
Set pytables parameters so we don't have file sharing.
"""
monkeypatch.setattr("tables.parameters.MAX_NUMEXPR_THREADS", 1, raising=False)
monkeypatch.setattr("tables.parameters.MAX_BLOSC_THREADS", 1, raising=False)
monkeypatch.setattr("tables.parameters.MAX_THREADS", 1, raising=False) but this needs to be a function scoped fixture, see pytest-dev/pytest#363 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in theory it would be better as a fixture but just trying to avoid code duplication this was always somewhat leaky because it globally patches it could be nicer but can do later There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. but IMO common.py should not have pytest imported. also, some of the functions in common could be fixturized (and maybe eventually remove common.py) i'm not convinced from the pytest docs that pytest.importorskip should be used outside the module, but it seems to work. but we need to merge ASAP to avoid merge conflicts, so i'll not block this further. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so what is the verdict on tables for this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
see #28746 (comment) |
||
# set these parameters so we don't have file sharing | ||
tables.parameters.MAX_NUMEXPR_THREADS = 1 | ||
tables.parameters.MAX_BLOSC_THREADS = 1 | ||
tables.parameters.MAX_THREADS = 1 | ||
|
||
|
||
# GH10447 | ||
|
||
|
||
def test_complex_fixed(setup_path): | ||
df = DataFrame( | ||
np.random.rand(4, 5).astype(np.complex64), | ||
index=list("abcd"), | ||
columns=list("ABCDE"), | ||
) | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
df = DataFrame( | ||
np.random.rand(4, 5).astype(np.complex128), | ||
index=list("abcd"), | ||
columns=list("ABCDE"), | ||
) | ||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
|
||
def test_complex_table(setup_path): | ||
df = DataFrame( | ||
np.random.rand(4, 5).astype(np.complex64), | ||
index=list("abcd"), | ||
columns=list("ABCDE"), | ||
) | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df", format="table") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
df = DataFrame( | ||
np.random.rand(4, 5).astype(np.complex128), | ||
index=list("abcd"), | ||
columns=list("ABCDE"), | ||
) | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df", format="table", mode="w") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
|
||
@td.xfail_non_writeable | ||
def test_complex_mixed_fixed(setup_path): | ||
complex64 = np.array( | ||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 | ||
) | ||
complex128 = np.array( | ||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||
) | ||
df = DataFrame( | ||
{ | ||
"A": [1, 2, 3, 4], | ||
"B": ["a", "b", "c", "d"], | ||
"C": complex64, | ||
"D": complex128, | ||
"E": [1.0, 2.0, 3.0, 4.0], | ||
}, | ||
index=list("abcd"), | ||
) | ||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
|
||
def test_complex_mixed_table(setup_path): | ||
complex64 = np.array( | ||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 | ||
) | ||
complex128 = np.array( | ||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||
) | ||
df = DataFrame( | ||
{ | ||
"A": [1, 2, 3, 4], | ||
"B": ["a", "b", "c", "d"], | ||
"C": complex64, | ||
"D": complex128, | ||
"E": [1.0, 2.0, 3.0, 4.0], | ||
}, | ||
index=list("abcd"), | ||
) | ||
|
||
with ensure_clean_store(setup_path) as store: | ||
store.append("df", df, data_columns=["A", "B"]) | ||
result = store.select("df", where="A>2") | ||
assert_frame_equal(df.loc[df.A > 2], result) | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
df.to_hdf(path, "df", format="table") | ||
reread = read_hdf(path, "df") | ||
assert_frame_equal(df, reread) | ||
|
||
|
||
def test_complex_across_dimensions_fixed(setup_path): | ||
with catch_warnings(record=True): | ||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||
s = Series(complex128, index=list("abcd")) | ||
df = DataFrame({"A": s, "B": s}) | ||
|
||
objs = [s, df] | ||
comps = [tm.assert_series_equal, tm.assert_frame_equal] | ||
for obj, comp in zip(objs, comps): | ||
with ensure_clean_path(setup_path) as path: | ||
obj.to_hdf(path, "obj", format="fixed") | ||
reread = read_hdf(path, "obj") | ||
comp(obj, reread) | ||
|
||
|
||
def test_complex_across_dimensions(setup_path): | ||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||
s = Series(complex128, index=list("abcd")) | ||
df = DataFrame({"A": s, "B": s}) | ||
|
||
with catch_warnings(record=True): | ||
|
||
objs = [df] | ||
comps = [tm.assert_frame_equal] | ||
for obj, comp in zip(objs, comps): | ||
with ensure_clean_path(setup_path) as path: | ||
obj.to_hdf(path, "obj", format="table") | ||
reread = read_hdf(path, "obj") | ||
comp(obj, reread) | ||
|
||
|
||
def test_complex_indexing_error(setup_path): | ||
complex128 = np.array( | ||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||
) | ||
df = DataFrame( | ||
{"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, | ||
index=list("abcd"), | ||
) | ||
with ensure_clean_store(setup_path) as store: | ||
with pytest.raises(TypeError): | ||
store.append("df", df, data_columns=["C"]) | ||
|
||
|
||
def test_complex_series_error(setup_path): | ||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||
s = Series(complex128, index=list("abcd")) | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
with pytest.raises(TypeError): | ||
s.to_hdf(path, "obj", format="t") | ||
|
||
with ensure_clean_path(setup_path) as path: | ||
s.to_hdf(path, "obj", format="t", index=False) | ||
reread = read_hdf(path, "obj") | ||
tm.assert_series_equal(s, reread) | ||
|
||
|
||
def test_complex_append(setup_path): | ||
df = DataFrame( | ||
{"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} | ||
) | ||
|
||
with ensure_clean_store(setup_path) as store: | ||
store.append("df", df, data_columns=["b"]) | ||
store.append("df", df) | ||
result = store.select("df") | ||
assert_frame_equal(pd.concat([df, df], 0), result) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in a future PR, we want to have doc-strings & typing for these
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
type errors in tests directories are not reported by mypy
pandas/setup.cfg
Lines 133 to 134 in 75c9783
so should typing be added? does help grok functions but equally if not correct (since not checked) could be confusing.