diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py new file mode 100644 index 0000000000000..d06f467760518 --- /dev/null +++ b/pandas/tests/io/pytables/common.py @@ -0,0 +1,82 @@ +from contextlib import contextmanager +import os +import tempfile + +import pytest + +from pandas.io.pytables import HDFStore + +tables = pytest.importorskip("tables") +# set these parameters so we don't have file sharing +tables.parameters.MAX_NUMEXPR_THREADS = 1 +tables.parameters.MAX_BLOSC_THREADS = 1 +tables.parameters.MAX_THREADS = 1 + + +def safe_remove(path): + if path is not None: + try: + os.remove(path) + except OSError: + pass + + +def safe_close(store): + try: + if store is not None: + store.close() + except IOError: + pass + + +def create_tempfile(path): + """ create an unopened named temporary file """ + return os.path.join(tempfile.gettempdir(), path) + + +# contextmanager to ensure the file cleanup +@contextmanager +def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False): + + try: + + # put in the temporary path if we don't have one already + if not len(os.path.dirname(path)): + path = create_tempfile(path) + + store = HDFStore( + path, mode=mode, complevel=complevel, complib=complib, fletcher32=False + ) + yield store + finally: + safe_close(store) + if mode == "w" or mode == "a": + safe_remove(path) + + +@contextmanager +def ensure_clean_path(path): + """ + return essentially a named temporary file that is not opened + and deleted on exiting; if path is a list, then create and + return list of filenames + """ + try: + if isinstance(path, list): + filenames = [create_tempfile(p) for p in path] + yield filenames + else: + filenames = [create_tempfile(path)] + yield filenames[0] + finally: + for f in filenames: + safe_remove(f) + + +def _maybe_remove(store, key): + """For tests using tables, try removing the table to be sure there is + no content from previous tests using the same table name.""" + try: + store.remove(key) + except (ValueError, KeyError): + pass diff --git a/pandas/tests/io/pytables/conftest.py b/pandas/tests/io/pytables/conftest.py new file mode 100644 index 0000000000000..6164f5d0722cc --- /dev/null +++ b/pandas/tests/io/pytables/conftest.py @@ -0,0 +1,17 @@ +import pytest + +import pandas.util.testing as tm + + +@pytest.fixture +def setup_path(): + """Fixture for setup path""" + return "tmp.__{}__.h5".format(tm.rands(10)) + + +@pytest.fixture(scope="module", autouse=True) +def setup_mode(): + """ Reset testing mode fixture""" + tm.reset_testing_mode() + yield + tm.set_testing_mode() diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index f5f73beab6d60..fe8d8c56a4e82 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -1,7 +1,7 @@ import pytest import pandas as pd -from pandas.tests.io.pytables.test_pytables import ensure_clean_path +from pandas.tests.io.pytables.common import ensure_clean_path from pandas.util.testing import assert_frame_equal tables = pytest.importorskip("tables") diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py new file mode 100644 index 0000000000000..e48cfb724ef1d --- /dev/null +++ b/pandas/tests/io/pytables/test_complex.py @@ -0,0 +1,186 @@ +from warnings import catch_warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series +from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal + +from pandas.io.pytables import read_hdf + +# GH10447 + + +def test_complex_fixed(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + +def test_complex_table(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", mode="w") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + +@td.xfail_non_writeable +def test_complex_mixed_fixed(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + +def test_complex_mixed_table(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["A", "B"]) + result = store.select("df", where="A>2") + assert_frame_equal(df.loc[df.A > 2], result) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + assert_frame_equal(df, reread) + + +def test_complex_across_dimensions_fixed(setup_path): + with catch_warnings(record=True): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + objs = [s, df] + comps = [tm.assert_series_equal, tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="fixed") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_across_dimensions(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + with catch_warnings(record=True): + + objs = [df] + comps = [tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="table") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_indexing_error(setup_path): + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, + index=list("abcd"), + ) + with ensure_clean_store(setup_path) as store: + with pytest.raises(TypeError): + store.append("df", df, data_columns=["C"]) + + +def test_complex_series_error(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + + with ensure_clean_path(setup_path) as path: + with pytest.raises(TypeError): + s.to_hdf(path, "obj", format="t") + + with ensure_clean_path(setup_path) as path: + s.to_hdf(path, "obj", format="t", index=False) + reread = read_hdf(path, "obj") + tm.assert_series_equal(s, reread) + + +def test_complex_append(setup_path): + df = DataFrame( + {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["b"]) + store.append("df", df) + result = store.select("df") + assert_frame_equal(pd.concat([df, df], 0), result) diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_store.py similarity index 88% rename from pandas/tests/io/pytables/test_pytables.py rename to pandas/tests/io/pytables/test_store.py index 46d8ef04dd8e5..140ee5082f55d 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1,11 +1,9 @@ -from contextlib import contextmanager import datetime from datetime import timedelta from distutils.version import LooseVersion from io import BytesIO import os import re -import tempfile from warnings import catch_warnings, simplefilter import numpy as np @@ -34,8 +32,17 @@ isna, timedelta_range, ) +from pandas.tests.io.pytables.common import ( + _maybe_remove, + create_tempfile, + ensure_clean_path, + ensure_clean_store, + safe_close, + safe_remove, + tables, +) import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal, set_timezone +from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.io.pytables import ( ClosedFileError, @@ -48,118 +55,12 @@ from pandas.io import pytables as pytables # noqa: E402 isort:skip from pandas.io.pytables import TableIterator # noqa: E402 isort:skip -tables = pytest.importorskip("tables") - - -@pytest.fixture -def setup_path(): - """Fixture for setup path""" - return "tmp.__{}__.h5".format(tm.rands(10)) - - -@pytest.fixture(scope="class", autouse=True) -def setup_mode(): - tm.reset_testing_mode() - yield - tm.set_testing_mode() - - -# TODO: -# remove when gh-24839 is fixed; this affects numpy 1.16 -# and pytables 3.4.4 -xfail_non_writeable = pytest.mark.xfail( - LooseVersion(np.__version__) >= LooseVersion("1.16") - and LooseVersion(tables.__version__) < LooseVersion("3.5.1"), - reason=( - "gh-25511, gh-24839. pytables needs a " - "release beyong 3.4.4 to support numpy 1.16x" - ), -) - _default_compressor = "blosc" - - ignore_natural_naming_warning = pytest.mark.filterwarnings( "ignore:object name:tables.exceptions.NaturalNameWarning" ) -# contextmanager to ensure the file cleanup - - -def safe_remove(path): - if path is not None: - try: - os.remove(path) - except OSError: - pass - - -def safe_close(store): - try: - if store is not None: - store.close() - except IOError: - pass - - -def create_tempfile(path): - """ create an unopened named temporary file """ - return os.path.join(tempfile.gettempdir(), path) - - -@contextmanager -def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False): - - try: - - # put in the temporary path if we don't have one already - if not len(os.path.dirname(path)): - path = create_tempfile(path) - - store = HDFStore( - path, mode=mode, complevel=complevel, complib=complib, fletcher32=False - ) - yield store - finally: - safe_close(store) - if mode == "w" or mode == "a": - safe_remove(path) - - -@contextmanager -def ensure_clean_path(path): - """ - return essentially a named temporary file that is not opened - and deleted on exiting; if path is a list, then create and - return list of filenames - """ - try: - if isinstance(path, list): - filenames = [create_tempfile(p) for p in path] - yield filenames - else: - filenames = [create_tempfile(path)] - yield filenames[0] - finally: - for f in filenames: - safe_remove(f) - - -# set these parameters so we don't have file sharing -tables.parameters.MAX_NUMEXPR_THREADS = 1 -tables.parameters.MAX_BLOSC_THREADS = 1 -tables.parameters.MAX_THREADS = 1 - - -def _maybe_remove(store, key): - """For tests using tables, try removing the table to be sure there is - no content from previous tests using the same table name.""" - try: - store.remove(key) - except (ValueError, KeyError): - pass - @pytest.mark.single class TestHDFStore: @@ -904,7 +805,7 @@ def test_put_integer(self, setup_path): df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal, setup_path) - @xfail_non_writeable + @td.xfail_non_writeable def test_put_mixed_type(self, setup_path): df = tm.makeTimeDataFrame() df["obj1"] = "foo" @@ -1507,7 +1408,7 @@ def test_to_hdf_with_min_itemsize(self, setup_path): ) @pytest.mark.parametrize( - "format", [pytest.param("fixed", marks=xfail_non_writeable), "table"] + "format", [pytest.param("fixed", marks=td.xfail_non_writeable), "table"] ) def test_to_hdf_errors(self, format, setup_path): @@ -1904,7 +1805,7 @@ def test_pass_spec_to_storer(self, setup_path): with pytest.raises(TypeError): store.select("df", where=[("columns=A")]) - @xfail_non_writeable + @td.xfail_non_writeable def test_append_misc(self, setup_path): with ensure_clean_store(setup_path) as store: @@ -2112,7 +2013,7 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): with pytest.raises(TypeError): store.append("df_unimplemented", df) - @xfail_non_writeable + @td.xfail_non_writeable @pytest.mark.skipif( LooseVersion(np.__version__) == LooseVersion("1.15.0"), reason=( @@ -2347,7 +2248,7 @@ def test_float_index(self, setup_path): s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) - @xfail_non_writeable + @td.xfail_non_writeable def test_tuple_index(self, setup_path): # GH #492 @@ -2360,7 +2261,7 @@ def test_tuple_index(self, setup_path): simplefilter("ignore", pd.errors.PerformanceWarning) self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) - @xfail_non_writeable + @td.xfail_non_writeable @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") def test_index_types(self, setup_path): @@ -2424,7 +2325,7 @@ def test_timeseries_preepoch(self, setup_path): except OverflowError: pytest.skip("known failer on some windows platforms") - @xfail_non_writeable + @td.xfail_non_writeable @pytest.mark.parametrize( "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] ) @@ -2458,7 +2359,7 @@ def test_frame(self, compression, setup_path): # empty self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path) - @xfail_non_writeable + @td.xfail_non_writeable def test_empty_series_frame(self, setup_path): s0 = Series() s1 = Series(name="myseries") @@ -2472,7 +2373,7 @@ def test_empty_series_frame(self, setup_path): self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) - @xfail_non_writeable + @td.xfail_non_writeable @pytest.mark.parametrize( "dtype", [np.int64, np.float64, np.object, "m8[ns]", "M8[ns]"] ) @@ -2558,7 +2459,7 @@ def test_store_series_name(self, setup_path): recons = store["series"] tm.assert_series_equal(recons, series) - @xfail_non_writeable + @td.xfail_non_writeable @pytest.mark.parametrize( "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] ) @@ -4116,7 +4017,7 @@ def test_pytables_native2_read(self, datapath, setup_path): d1 = store["detector"] assert isinstance(d1, DataFrame) - @xfail_non_writeable + @td.xfail_non_writeable def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path): # GH 24510 # legacy table with fixed format written in Python 2 @@ -4275,7 +4176,7 @@ def test_unicode_longer_encoded(self, setup_path): result = store.get("df") tm.assert_frame_equal(result, df) - @xfail_non_writeable + @td.xfail_non_writeable def test_store_datetime_mixed(self, setup_path): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) @@ -4855,536 +4756,3 @@ def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path): with ensure_clean_path(setup_path) as path: with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): df.to_hdf(path, "df") - - -class TestHDFComplexValues: - # GH10447 - - def test_complex_fixed(self, setup_path): - df = DataFrame( - np.random.rand(4, 5).astype(np.complex64), - index=list("abcd"), - columns=list("ABCDE"), - ) - - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - df = DataFrame( - np.random.rand(4, 5).astype(np.complex128), - index=list("abcd"), - columns=list("ABCDE"), - ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - def test_complex_table(self, setup_path): - df = DataFrame( - np.random.rand(4, 5).astype(np.complex64), - index=list("abcd"), - columns=list("ABCDE"), - ) - - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - df = DataFrame( - np.random.rand(4, 5).astype(np.complex128), - index=list("abcd"), - columns=list("ABCDE"), - ) - - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", mode="w") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - @xfail_non_writeable - def test_complex_mixed_fixed(self, setup_path): - complex64 = np.array( - [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 - ) - complex128 = np.array( - [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 - ) - df = DataFrame( - { - "A": [1, 2, 3, 4], - "B": ["a", "b", "c", "d"], - "C": complex64, - "D": complex128, - "E": [1.0, 2.0, 3.0, 4.0], - }, - index=list("abcd"), - ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - def test_complex_mixed_table(self, setup_path): - complex64 = np.array( - [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 - ) - complex128 = np.array( - [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 - ) - df = DataFrame( - { - "A": [1, 2, 3, 4], - "B": ["a", "b", "c", "d"], - "C": complex64, - "D": complex128, - "E": [1.0, 2.0, 3.0, 4.0], - }, - index=list("abcd"), - ) - - with ensure_clean_store(setup_path) as store: - store.append("df", df, data_columns=["A", "B"]) - result = store.select("df", where="A>2") - assert_frame_equal(df.loc[df.A > 2], result) - - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - reread = read_hdf(path, "df") - assert_frame_equal(df, reread) - - def test_complex_across_dimensions_fixed(self, setup_path): - with catch_warnings(record=True): - complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) - s = Series(complex128, index=list("abcd")) - df = DataFrame({"A": s, "B": s}) - - objs = [s, df] - comps = [tm.assert_series_equal, tm.assert_frame_equal] - for obj, comp in zip(objs, comps): - with ensure_clean_path(setup_path) as path: - obj.to_hdf(path, "obj", format="fixed") - reread = read_hdf(path, "obj") - comp(obj, reread) - - def test_complex_across_dimensions(self, setup_path): - complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) - s = Series(complex128, index=list("abcd")) - df = DataFrame({"A": s, "B": s}) - - with catch_warnings(record=True): - - objs = [df] - comps = [tm.assert_frame_equal] - for obj, comp in zip(objs, comps): - with ensure_clean_path(setup_path) as path: - obj.to_hdf(path, "obj", format="table") - reread = read_hdf(path, "obj") - comp(obj, reread) - - def test_complex_indexing_error(self, setup_path): - complex128 = np.array( - [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 - ) - df = DataFrame( - {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, - index=list("abcd"), - ) - with ensure_clean_store(setup_path) as store: - with pytest.raises(TypeError): - store.append("df", df, data_columns=["C"]) - - def test_complex_series_error(self, setup_path): - complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) - s = Series(complex128, index=list("abcd")) - - with ensure_clean_path(setup_path) as path: - with pytest.raises(TypeError): - s.to_hdf(path, "obj", format="t") - - with ensure_clean_path(setup_path) as path: - s.to_hdf(path, "obj", format="t", index=False) - reread = read_hdf(path, "obj") - tm.assert_series_equal(s, reread) - - def test_complex_append(self, setup_path): - df = DataFrame( - {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} - ) - - with ensure_clean_store(setup_path) as store: - store.append("df", df, data_columns=["b"]) - store.append("df", df) - result = store.select("df") - assert_frame_equal(pd.concat([df, df], 0), result) - - -# @pytest.mark.usefixtures("setup_path") -class TestTimezones: - def _compare_with_tz(self, a, b): - tm.assert_frame_equal(a, b) - - # compare the zones on each element - for c in a.columns: - for i in a.index: - a_e = a.loc[i, c] - b_e = b.loc[i, c] - if not (a_e == b_e and a_e.tz == b_e.tz): - raise AssertionError( - "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e) - ) - - def test_append_with_timezones_dateutil(self, setup_path): - - from datetime import timedelta - - # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows - # filename issues. - from pandas._libs.tslibs.timezones import maybe_get_tz - - gettz = lambda x: maybe_get_tz("dateutil/" + x) - - # as columns - with ensure_clean_store(setup_path) as store: - - _maybe_remove(store, "df_tz") - df = DataFrame( - dict( - A=[ - Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) - + timedelta(hours=1) * i - for i in range(5) - ] - ) - ) - - store.append("df_tz", df, data_columns=["A"]) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - # select with tz aware - expected = df[df.A >= df.A[3]] - result = store.select("df_tz", where="A>=df.A[3]") - self._compare_with_tz(result, expected) - - # ensure we include dates in DST and STD time here. - _maybe_remove(store, "df_tz") - df = DataFrame( - dict( - A=Timestamp("20130102", tz=gettz("US/Eastern")), - B=Timestamp("20130603", tz=gettz("US/Eastern")), - ), - index=range(5), - ) - store.append("df_tz", df) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - df = DataFrame( - dict( - A=Timestamp("20130102", tz=gettz("US/Eastern")), - B=Timestamp("20130102", tz=gettz("EET")), - ), - index=range(5), - ) - with pytest.raises(ValueError): - store.append("df_tz", df) - - # this is ok - _maybe_remove(store, "df_tz") - store.append("df_tz", df, data_columns=["A", "B"]) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - # can't append with diff timezone - df = DataFrame( - dict( - A=Timestamp("20130102", tz=gettz("US/Eastern")), - B=Timestamp("20130102", tz=gettz("CET")), - ), - index=range(5), - ) - with pytest.raises(ValueError): - store.append("df_tz", df) - - # as index - with ensure_clean_store(setup_path) as store: - - # GH 4098 example - df = DataFrame( - dict( - A=Series( - range(3), - index=date_range( - "2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern") - ), - ) - ) - ) - - _maybe_remove(store, "df") - store.put("df", df) - result = store.select("df") - assert_frame_equal(result, df) - - _maybe_remove(store, "df") - store.append("df", df) - result = store.select("df") - assert_frame_equal(result, df) - - def test_append_with_timezones_pytz(self, setup_path): - - from datetime import timedelta - - # as columns - with ensure_clean_store(setup_path) as store: - - _maybe_remove(store, "df_tz") - df = DataFrame( - dict( - A=[ - Timestamp("20130102 2:00:00", tz="US/Eastern") - + timedelta(hours=1) * i - for i in range(5) - ] - ) - ) - store.append("df_tz", df, data_columns=["A"]) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - # select with tz aware - self._compare_with_tz( - store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]] - ) - - _maybe_remove(store, "df_tz") - # ensure we include dates in DST and STD time here. - df = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130603", tz="US/Eastern"), - ), - index=range(5), - ) - store.append("df_tz", df) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - df = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130102", tz="EET"), - ), - index=range(5), - ) - with pytest.raises(ValueError): - store.append("df_tz", df) - - # this is ok - _maybe_remove(store, "df_tz") - store.append("df_tz", df, data_columns=["A", "B"]) - result = store["df_tz"] - self._compare_with_tz(result, df) - assert_frame_equal(result, df) - - # can't append with diff timezone - df = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130102", tz="CET"), - ), - index=range(5), - ) - with pytest.raises(ValueError): - store.append("df_tz", df) - - # as index - with ensure_clean_store(setup_path) as store: - - # GH 4098 example - df = DataFrame( - dict( - A=Series( - range(3), - index=date_range( - "2000-1-1", periods=3, freq="H", tz="US/Eastern" - ), - ) - ) - ) - - _maybe_remove(store, "df") - store.put("df", df) - result = store.select("df") - assert_frame_equal(result, df) - - _maybe_remove(store, "df") - store.append("df", df) - result = store.select("df") - assert_frame_equal(result, df) - - def test_tseries_select_index_column(self, setup_path): - # GH7777 - # selecting a UTC datetimeindex column did - # not preserve UTC tzinfo set before storing - - # check that no tz still works - rng = date_range("1/1/2000", "1/30/2000") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - - with ensure_clean_store(setup_path) as store: - store.append("frame", frame) - result = store.select_column("frame", "index") - assert rng.tz == DatetimeIndex(result.values).tz - - # check utc - rng = date_range("1/1/2000", "1/30/2000", tz="UTC") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - - with ensure_clean_store(setup_path) as store: - store.append("frame", frame) - result = store.select_column("frame", "index") - assert rng.tz == result.dt.tz - - # double check non-utc - rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - - with ensure_clean_store(setup_path) as store: - store.append("frame", frame) - result = store.select_column("frame", "index") - assert rng.tz == result.dt.tz - - def test_timezones_fixed(self, setup_path): - with ensure_clean_store(setup_path) as store: - - # index - rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - store["df"] = df - result = store["df"] - assert_frame_equal(result, df) - - # as data - # GH11411 - _maybe_remove(store, "df") - df = DataFrame( - { - "A": rng, - "B": rng.tz_convert("UTC").tz_localize(None), - "C": rng.tz_convert("CET"), - "D": range(len(rng)), - }, - index=rng, - ) - store["df"] = df - result = store["df"] - assert_frame_equal(result, df) - - def test_fixed_offset_tz(self, setup_path): - rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - - with ensure_clean_store(setup_path) as store: - store["frame"] = frame - recons = store["frame"] - tm.assert_index_equal(recons.index, rng) - assert rng.tz == recons.index.tz - - @td.skip_if_windows - def test_store_timezone(self, setup_path): - # GH2852 - # issue storing datetime.date with a timezone as it resets when read - # back in a new timezone - - # original method - with ensure_clean_store(setup_path) as store: - - today = datetime.date(2013, 9, 10) - df = DataFrame([1, 2, 3], index=[today, today, today]) - store["obj1"] = df - result = store["obj1"] - assert_frame_equal(result, df) - - # with tz setting - with ensure_clean_store(setup_path) as store: - - with set_timezone("EST5EDT"): - today = datetime.date(2013, 9, 10) - df = DataFrame([1, 2, 3], index=[today, today, today]) - store["obj1"] = df - - with set_timezone("CST6CDT"): - result = store["obj1"] - - assert_frame_equal(result, df) - - def test_legacy_datetimetz_object(self, datapath, setup_path): - # legacy from < 0.17.0 - # 8260 - expected = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130603", tz="CET"), - ), - index=range(5), - ) - with ensure_clean_store( - datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r" - ) as store: - result = store["df"] - assert_frame_equal(result, expected) - - def test_dst_transitions(self, setup_path): - # make sure we are not failing on transitions - with ensure_clean_store(setup_path) as store: - times = pd.date_range( - "2013-10-26 23:00", - "2013-10-27 01:00", - tz="Europe/London", - freq="H", - ambiguous="infer", - ) - - for i in [times, times + pd.Timedelta("10min")]: - _maybe_remove(store, "df") - df = DataFrame({"A": range(len(i)), "B": i}, index=i) - store.append("df", df) - result = store.select("df") - assert_frame_equal(result, df) - - def test_read_with_where_tz_aware_index(self, setup_path): - # GH 11926 - periods = 10 - dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC") - mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) - expected = pd.DataFrame({"MYCOL": 0}, index=mi) - - key = "mykey" - with ensure_clean_path(setup_path) as path: - with pd.HDFStore(path) as store: - store.append(key, expected, format="table", append=True) - result = pd.read_hdf(path, key, where="DATE > 20151130") - assert_frame_equal(result, expected) - - def test_py2_created_with_datetimez(self, datapath, setup_path): - # The test HDF5 file was created in Python 2, but could not be read in - # Python 3. - # - # GH26443 - index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] - expected = DataFrame({"data": 123}, index=index) - with ensure_clean_store( - datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" - ) as store: - result = store["key"] - assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py new file mode 100644 index 0000000000000..ba1df24224831 --- /dev/null +++ b/pandas/tests/io/pytables/test_timezones.py @@ -0,0 +1,387 @@ +import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, set_timezone + + +def _compare_with_tz(a, b): + tm.assert_frame_equal(a, b) + + # compare the zones on each element + for c in a.columns: + for i in a.index: + a_e = a.loc[i, c] + b_e = b.loc[i, c] + if not (a_e == b_e and a_e.tz == b_e.tz): + raise AssertionError( + "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e) + ) + + +def test_append_with_timezones_dateutil(setup_path): + + from datetime import timedelta + + # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows + # filename issues. + from pandas._libs.tslibs.timezones import maybe_get_tz + + gettz = lambda x: maybe_get_tz("dateutil/" + x) + + # as columns + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=[ + Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) + + timedelta(hours=1) * i + for i in range(5) + ] + ) + ) + + store.append("df_tz", df, data_columns=["A"]) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + # select with tz aware + expected = df[df.A >= df.A[3]] + result = store.select("df_tz", where="A>=df.A[3]") + _compare_with_tz(result, expected) + + # ensure we include dates in DST and STD time here. + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130603", tz=gettz("US/Eastern")), + ), + index=range(5), + ) + store.append("df_tz", df) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130102", tz=gettz("EET")), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # this is ok + _maybe_remove(store, "df_tz") + store.append("df_tz", df, data_columns=["A", "B"]) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + # can't append with diff timezone + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130102", tz=gettz("CET")), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # as index + with ensure_clean_store(setup_path) as store: + + # GH 4098 example + df = DataFrame( + dict( + A=Series( + range(3), + index=date_range( + "2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern") + ), + ) + ) + ) + + _maybe_remove(store, "df") + store.put("df", df) + result = store.select("df") + assert_frame_equal(result, df) + + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df") + assert_frame_equal(result, df) + + +def test_append_with_timezones_pytz(setup_path): + + from datetime import timedelta + + # as columns + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=[ + Timestamp("20130102 2:00:00", tz="US/Eastern") + + timedelta(hours=1) * i + for i in range(5) + ] + ) + ) + store.append("df_tz", df, data_columns=["A"]) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + # select with tz aware + _compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]]) + + _maybe_remove(store, "df_tz") + # ensure we include dates in DST and STD time here. + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130603", tz="US/Eastern"), + ), + index=range(5), + ) + store.append("df_tz", df) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130102", tz="EET"), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # this is ok + _maybe_remove(store, "df_tz") + store.append("df_tz", df, data_columns=["A", "B"]) + result = store["df_tz"] + _compare_with_tz(result, df) + assert_frame_equal(result, df) + + # can't append with diff timezone + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130102", tz="CET"), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # as index + with ensure_clean_store(setup_path) as store: + + # GH 4098 example + df = DataFrame( + dict( + A=Series( + range(3), + index=date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern"), + ) + ) + ) + + _maybe_remove(store, "df") + store.put("df", df) + result = store.select("df") + assert_frame_equal(result, df) + + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df") + assert_frame_equal(result, df) + + +def test_tseries_select_index_column(setup_path): + # GH7777 + # selecting a UTC datetimeindex column did + # not preserve UTC tzinfo set before storing + + # check that no tz still works + rng = date_range("1/1/2000", "1/30/2000") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == DatetimeIndex(result.values).tz + + # check utc + rng = date_range("1/1/2000", "1/30/2000", tz="UTC") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + # double check non-utc + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + +def test_timezones_fixed(setup_path): + with ensure_clean_store(setup_path) as store: + + # index + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + store["df"] = df + result = store["df"] + assert_frame_equal(result, df) + + # as data + # GH11411 + _maybe_remove(store, "df") + df = DataFrame( + { + "A": rng, + "B": rng.tz_convert("UTC").tz_localize(None), + "C": rng.tz_convert("CET"), + "D": range(len(rng)), + }, + index=rng, + ) + store["df"] = df + result = store["df"] + assert_frame_equal(result, df) + + +def test_fixed_offset_tz(setup_path): + rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store["frame"] = frame + recons = store["frame"] + tm.assert_index_equal(recons.index, rng) + assert rng.tz == recons.index.tz + + +@td.skip_if_windows +def test_store_timezone(setup_path): + # GH2852 + # issue storing datetime.date with a timezone as it resets when read + # back in a new timezone + + # original method + with ensure_clean_store(setup_path) as store: + + today = datetime.date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + result = store["obj1"] + assert_frame_equal(result, df) + + # with tz setting + with ensure_clean_store(setup_path) as store: + + with set_timezone("EST5EDT"): + today = datetime.date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + + with set_timezone("CST6CDT"): + result = store["obj1"] + + assert_frame_equal(result, df) + + +def test_legacy_datetimetz_object(datapath, setup_path): + # legacy from < 0.17.0 + # 8260 + expected = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), B=Timestamp("20130603", tz="CET") + ), + index=range(5), + ) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r" + ) as store: + result = store["df"] + assert_frame_equal(result, expected) + + +def test_dst_transitions(setup_path): + # make sure we are not failing on transitions + with ensure_clean_store(setup_path) as store: + times = pd.date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + + for i in [times, times + pd.Timedelta("10min")]: + _maybe_remove(store, "df") + df = DataFrame({"A": range(len(i)), "B": i}, index=i) + store.append("df", df) + result = store.select("df") + assert_frame_equal(result, df) + + +def test_read_with_where_tz_aware_index(setup_path): + # GH 11926 + periods = 10 + dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC") + mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) + expected = pd.DataFrame({"MYCOL": 0}, index=mi) + + key = "mykey" + with ensure_clean_path(setup_path) as path: + with pd.HDFStore(path) as store: + store.append(key, expected, format="table", append=True) + result = pd.read_hdf(path, key, where="DATE > 20151130") + assert_frame_equal(result, expected) + + +def test_py2_created_with_datetimez(datapath, setup_path): + # The test HDF5 file was created in Python 2, but could not be read in + # Python 3. + # + # GH26443 + index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + assert_frame_equal(result, expected) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 0e07b9f5fe9f7..c9fd426f68b48 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -27,6 +27,7 @@ def test_foo(): import locale from typing import Callable, Optional +import numpy as np import pytest from pandas.compat import is_platform_32bit, is_platform_windows @@ -73,6 +74,21 @@ def safe_import(mod_name, min_version=None): return False +# TODO: +# remove when gh-24839 is fixed; this affects numpy 1.16 +# and pytables 3.4.4 +tables = safe_import("tables") +xfail_non_writeable = pytest.mark.xfail( + tables + and LooseVersion(np.__version__) >= LooseVersion("1.16") + and LooseVersion(tables.__version__) < LooseVersion("3.5.1"), + reason=( + "gh-25511, gh-24839. pytables needs a " + "release beyong 3.4.4 to support numpy 1.16x" + ), +) + + def _skip_if_no_mpl(): mod = safe_import("matplotlib") if mod: