Skip to content

Test pytables refactor #28746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Oct 5, 2019
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
b7450c7
added a conftest.py to pytables subdir
tolaa001 Oct 2, 2019
480184d
added common.py file to migrate the functions which all 3 test classe…
tolaa001 Oct 2, 2019
31d6836
moved TestTimezone class to test_timezones.py and relevant imports
tolaa001 Oct 2, 2019
e3242f4
moved HDFComplexvalues class to test_hdf_complex_values.py and releva…
tolaa001 Oct 2, 2019
1985c96
updated common.py, updated imports in test_hdf_complex_values and cre…
tolaa001 Oct 2, 2019
e93f3df
removed tables variable as not required for Timezone class
tolaa001 Oct 2, 2019
80d6aaf
cleaned up unused imports
tolaa001 Oct 2, 2019
7f0842b
updated import for ensure_clean_path to reference common.py instead
tolaa001 Oct 2, 2019
882ba8e
deleted test_pytables.py after splitting contents across 4 python files
tolaa001 Oct 2, 2019
54483fb
created test_hdf_store module to move class TestHDFStore
tolaa001 Oct 2, 2019
cd6a2c9
Ran black formatting
tolaa001 Oct 2, 2019
eb14b23
amended scope for setup_mode to module as the future state of test mo…
tolaa001 Oct 2, 2019
7b640fc
running isort to import in the right order
tolaa001 Oct 2, 2019
44c56f1
moving tables from common.py to the modules requiring this
tolaa001 Oct 2, 2019
595670e
renaming test files to make them sexier
tolaa001 Oct 2, 2019
515825a
copied pytest.importorskip to timezone
tolaa001 Oct 2, 2019
6106a24
removed test_timezone class and kept module functional
tolaa001 Oct 2, 2019
8f540a7
removed test_hdfcomplexvalues class and kept module functional
tolaa001 Oct 2, 2019
cfdd437
convert test_store back to class to make git diff easier for review
tolaa001 Oct 2, 2019
6375977
Merge remote-tracking branch 'upstream/master' into test_pytables_ref…
tolaa001 Oct 2, 2019
aa97d78
moved xfail_non_writeable into _test_decorators
tolaa001 Oct 2, 2019
9931352
removed numpy from safe import
tolaa001 Oct 2, 2019
27d62e4
importing numpy instead of using safe import
tolaa001 Oct 3, 2019
2e83dfd
Merge remote-tracking branch 'upstream/master' into test_pytables_ref…
tolaa001 Oct 3, 2019
eeac6ce
removing unused imports
tolaa001 Oct 3, 2019
78c710b
added tables to xfail decorator
tolaa001 Oct 3, 2019
a38bc5e
changed import for decorator
tolaa001 Oct 3, 2019
6d97676
reverted accidentally changes to test_operator.py
tolaa001 Oct 3, 2019
64ec41d
Merge remote-tracking branch 'upstream/master' into test_pytables_ref…
tolaa001 Oct 3, 2019
49e9d31
moved tables to common.py
tolaa001 Oct 4, 2019
e5cecce
fixed import
tolaa001 Oct 4, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions pandas/tests/io/pytables/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from contextlib import contextmanager
import os
import tempfile

from pandas.io.pytables import HDFStore


def safe_remove(path):
if path is not None:
try:
os.remove(path)
except OSError:
pass


def safe_close(store):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in a future PR, we want to have doc-strings & typing for these

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type errors in tests directories are not reported by mypy

pandas/setup.cfg

Lines 133 to 134 in 75c9783

[mypy-pandas.conftest,pandas.tests.*]
ignore_errors=True

so should typing be added? does help grok functions but equally if not correct (since not checked) could be confusing.

try:
if store is not None:
store.close()
except IOError:
pass


def create_tempfile(path):
""" create an unopened named temporary file """
return os.path.join(tempfile.gettempdir(), path)


# contextmanager to ensure the file cleanup
@contextmanager
def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False):

try:

# put in the temporary path if we don't have one already
if not len(os.path.dirname(path)):
path = create_tempfile(path)

store = HDFStore(
path, mode=mode, complevel=complevel, complib=complib, fletcher32=False
)
yield store
finally:
safe_close(store)
if mode == "w" or mode == "a":
safe_remove(path)


@contextmanager
def ensure_clean_path(path):
"""
return essentially a named temporary file that is not opened
and deleted on exiting; if path is a list, then create and
return list of filenames
"""
try:
if isinstance(path, list):
filenames = [create_tempfile(p) for p in path]
yield filenames
else:
filenames = [create_tempfile(path)]
yield filenames[0]
finally:
for f in filenames:
safe_remove(f)


def _maybe_remove(store, key):
"""For tests using tables, try removing the table to be sure there is
no content from previous tests using the same table name."""
try:
store.remove(key)
except (ValueError, KeyError):
pass
17 changes: 17 additions & 0 deletions pandas/tests/io/pytables/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest

import pandas.util.testing as tm


@pytest.fixture
def setup_path():
"""Fixture for setup path"""
return "tmp.__{}__.h5".format(tm.rands(10))


@pytest.fixture(scope="module", autouse=True)
def setup_mode():
""" Reset testing mode fixture"""
tm.reset_testing_mode()
yield
tm.set_testing_mode()
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_compat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

import pandas as pd
from pandas.tests.io.pytables.test_pytables import ensure_clean_path
from pandas.tests.io.pytables.common import ensure_clean_path
from pandas.util.testing import assert_frame_equal

tables = pytest.importorskip("tables")
Expand Down
193 changes: 193 additions & 0 deletions pandas/tests/io/pytables/test_complex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from warnings import catch_warnings

import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import DataFrame, Series
from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal

from pandas.io.pytables import read_hdf

tables = pytest.importorskip("tables")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right can you move these to common.py (the tables.parameters.*) that are repeated across files.

you can also move the pytest.importorskip to common.py as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this would mean having to import pytest in your common module, is this something you'd be ok with?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback would this be acceptable?

diff --git a/pandas/tests/io/pytables/conftest.py b/pandas/tests/io/pytables/conftest.py
index 6164f5d07..db00b1e33 100644
--- a/pandas/tests/io/pytables/conftest.py
+++ b/pandas/tests/io/pytables/conftest.py
@@ -15,3 +15,14 @@ def setup_mode():
     tm.reset_testing_mode()
     yield
     tm.set_testing_mode()
+
+
+@pytest.fixture(scope="module", autouse=True)
+def set_parameters():
+    """
+    Set pytables parameters so we don't have file sharing.
+    """
+    tables = pytest.importorskip("tables")
+    tables.parameters.MAX_NUMEXPR_THREADS = 1
+    tables.parameters.MAX_BLOSC_THREADS = 1
+    tables.parameters.MAX_THREADS = 1
diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py
index 96c4a3042..37739b1fd 100644
--- a/pandas/tests/io/pytables/test_complex.py
+++ b/pandas/tests/io/pytables/test_complex.py
@@ -14,10 +14,6 @@ from pandas.util.testing import assert_frame_equal
 from pandas.io.pytables import read_hdf
 
 tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
 
 
 # GH10447
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 97fd89903..e31cb0cf6 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -56,10 +56,6 @@ from pandas.io.pytables import TableIterator  # noqa: E402 isort:skip
 
 
 tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
 
 
 _default_compressor = "blosc"
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
index 3e91c6381..b18fdc7b6 100644
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -16,10 +16,6 @@ import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, set_timezone
 
 tables = pytest.importorskip("tables")
-# set these parameters so we don't have file sharing
-tables.parameters.MAX_NUMEXPR_THREADS = 1
-tables.parameters.MAX_BLOSC_THREADS = 1
-tables.parameters.MAX_THREADS = 1
 
 
 def _compare_with_tz(a, b):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the other alternative is to monkeypatch..

@pytest.fixture(autouse=True)
def set_parameters(monkeypatch):
    """
    Set pytables parameters so we don't have file sharing.
    """
    monkeypatch.setattr("tables.parameters.MAX_NUMEXPR_THREADS", 1, raising=False)
    monkeypatch.setattr("tables.parameters.MAX_BLOSC_THREADS", 1, raising=False)
    monkeypatch.setattr("tables.parameters.MAX_THREADS", 1, raising=False)

but this needs to be a function scoped fixture, see pytest-dev/pytest#363

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in theory it would be better as a fixture but just trying to avoid code duplication

this was always somewhat leaky because it globally patches

it could be nicer but can do later

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok. but IMO common.py should not have pytest imported.

also, some of the functions in common could be fixturized (and maybe eventually remove common.py)

i'm not convinced from the pytest docs that pytest.importorskip should be used outside the module, but it seems to work.

but we need to merge ASAP to avoid merge conflicts, so i'll not block this further.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so what is the verdict on tables for this PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so what is the verdict on tables for this PR?

see #28746 (comment)

# set these parameters so we don't have file sharing
tables.parameters.MAX_NUMEXPR_THREADS = 1
tables.parameters.MAX_BLOSC_THREADS = 1
tables.parameters.MAX_THREADS = 1


# GH10447


def test_complex_fixed(setup_path):
df = DataFrame(
np.random.rand(4, 5).astype(np.complex64),
index=list("abcd"),
columns=list("ABCDE"),
)

with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)

df = DataFrame(
np.random.rand(4, 5).astype(np.complex128),
index=list("abcd"),
columns=list("ABCDE"),
)
with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)


def test_complex_table(setup_path):
df = DataFrame(
np.random.rand(4, 5).astype(np.complex64),
index=list("abcd"),
columns=list("ABCDE"),
)

with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)

df = DataFrame(
np.random.rand(4, 5).astype(np.complex128),
index=list("abcd"),
columns=list("ABCDE"),
)

with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table", mode="w")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)


@td.xfail_non_writeable
def test_complex_mixed_fixed(setup_path):
complex64 = np.array(
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
)
complex128 = np.array(
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
)
df = DataFrame(
{
"A": [1, 2, 3, 4],
"B": ["a", "b", "c", "d"],
"C": complex64,
"D": complex128,
"E": [1.0, 2.0, 3.0, 4.0],
},
index=list("abcd"),
)
with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)


def test_complex_mixed_table(setup_path):
complex64 = np.array(
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
)
complex128 = np.array(
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
)
df = DataFrame(
{
"A": [1, 2, 3, 4],
"B": ["a", "b", "c", "d"],
"C": complex64,
"D": complex128,
"E": [1.0, 2.0, 3.0, 4.0],
},
index=list("abcd"),
)

with ensure_clean_store(setup_path) as store:
store.append("df", df, data_columns=["A", "B"])
result = store.select("df", where="A>2")
assert_frame_equal(df.loc[df.A > 2], result)

with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table")
reread = read_hdf(path, "df")
assert_frame_equal(df, reread)


def test_complex_across_dimensions_fixed(setup_path):
with catch_warnings(record=True):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list("abcd"))
df = DataFrame({"A": s, "B": s})

objs = [s, df]
comps = [tm.assert_series_equal, tm.assert_frame_equal]
for obj, comp in zip(objs, comps):
with ensure_clean_path(setup_path) as path:
obj.to_hdf(path, "obj", format="fixed")
reread = read_hdf(path, "obj")
comp(obj, reread)


def test_complex_across_dimensions(setup_path):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list("abcd"))
df = DataFrame({"A": s, "B": s})

with catch_warnings(record=True):

objs = [df]
comps = [tm.assert_frame_equal]
for obj, comp in zip(objs, comps):
with ensure_clean_path(setup_path) as path:
obj.to_hdf(path, "obj", format="table")
reread = read_hdf(path, "obj")
comp(obj, reread)


def test_complex_indexing_error(setup_path):
complex128 = np.array(
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
)
df = DataFrame(
{"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},
index=list("abcd"),
)
with ensure_clean_store(setup_path) as store:
with pytest.raises(TypeError):
store.append("df", df, data_columns=["C"])


def test_complex_series_error(setup_path):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list("abcd"))

with ensure_clean_path(setup_path) as path:
with pytest.raises(TypeError):
s.to_hdf(path, "obj", format="t")

with ensure_clean_path(setup_path) as path:
s.to_hdf(path, "obj", format="t", index=False)
reread = read_hdf(path, "obj")
tm.assert_series_equal(s, reread)


def test_complex_append(setup_path):
df = DataFrame(
{"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)}
)

with ensure_clean_store(setup_path) as store:
store.append("df", df, data_columns=["b"])
store.append("df", df)
result = store.select("df")
assert_frame_equal(pd.concat([df, df], 0), result)
Loading