From 39cd70a2f18f516d9a759a5e88d80fc9821dea42 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 30 Sep 2022 21:03:05 -0700 Subject: [PATCH 1/3] TST/CLN: Remove ensure_clean_path for tmp_dir fixture --- pandas/tests/io/pytables/common.py | 40 +- pandas/tests/io/pytables/test_append.py | 13 +- pandas/tests/io/pytables/test_categorical.py | 41 +- pandas/tests/io/pytables/test_compat.py | 20 +- pandas/tests/io/pytables/test_complex.py | 97 ++-- pandas/tests/io/pytables/test_errors.py | 91 ++-- .../tests/io/pytables/test_file_handling.py | 506 +++++++++--------- pandas/tests/io/pytables/test_keys.py | 31 +- pandas/tests/io/pytables/test_put.py | 63 ++- pandas/tests/io/pytables/test_read.py | 159 +++--- .../io/pytables/test_retain_attributes.py | 53 +- pandas/tests/io/pytables/test_round_trip.py | 105 ++-- pandas/tests/io/pytables/test_select.py | 138 +++-- pandas/tests/io/pytables/test_store.py | 211 ++++---- pandas/tests/io/pytables/test_subclass.py | 41 +- pandas/tests/io/pytables/test_timezones.py | 13 +- 16 files changed, 787 insertions(+), 835 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 88a32e1a75972..dc02f62e42f78 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -14,14 +14,6 @@ tables.parameters.MAX_THREADS = 1 -def safe_remove(path): - if path is not None: - try: - os.remove(path) # noqa: PDF008 - except OSError: - pass - - def safe_close(store): try: if store is not None: @@ -30,11 +22,6 @@ def safe_close(store): pass -def create_tempfile(path): - """create an unopened named temporary file""" - return os.path.join(tempfile.gettempdir(), path) - - # contextmanager to ensure the file cleanup @contextmanager def ensure_clean_store( @@ -45,7 +32,7 @@ def ensure_clean_store( # put in the temporary path if we don't have one already if not len(os.path.dirname(path)): - path = create_tempfile(path) + path = os.path.join(tempfile.gettempdir(), path) store = HDFStore( path, mode=mode, complevel=complevel, complib=complib, fletcher32=False @@ -54,26 +41,11 @@ def ensure_clean_store( finally: safe_close(store) if mode == "w" or mode == "a": - safe_remove(path) - - -@contextmanager -def ensure_clean_path(path): - """ - return essentially a named temporary file that is not opened - and deleted on exiting; if path is a list, then create and - return list of filenames - """ - try: - if isinstance(path, list): - filenames = [create_tempfile(p) for p in path] - yield filenames - else: - filenames = [create_tempfile(path)] - yield filenames[0] - finally: - for f in filenames: - safe_remove(f) + if path is not None: + try: + os.remove(path) # noqa: PDF008 + except OSError: + pass def _maybe_remove(store, key): diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 40a50c55de2a4..c675ca55006e6 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -20,7 +20,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) @@ -637,7 +636,7 @@ def check_col(key, name, size): tm.assert_frame_equal(result, expected) -def test_append_hierarchical(setup_path, multiindex_dataframe_random_data): +def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_data): df = multiindex_dataframe_random_data df.columns.name = None @@ -651,11 +650,11 @@ def test_append_hierarchical(setup_path, multiindex_dataframe_random_data): expected = df.reindex(columns=["A", "B"]) tm.assert_frame_equal(result, expected) - with ensure_clean_path("test.hdf") as path: - df.to_hdf(path, "df", format="table") - result = read_hdf(path, "df", columns=["A", "B"]) - expected = df.reindex(columns=["A", "B"]) - tm.assert_frame_equal(result, expected) + path = tmp_path / "test.hdf" + df.to_hdf(path, "df", format="table") + result = read_hdf(path, "df", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) def test_append_misc(setup_path): diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 3ef685d1132b1..c70e49cfb0bde 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -11,7 +11,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) @@ -147,7 +146,7 @@ def test_categorical(setup_path): store.select("df3/meta/s/meta") -def test_categorical_conversion(setup_path): +def test_categorical_conversion(tmp_path, setup_path): # GH13322 # Check that read_hdf with categorical columns doesn't return rows if @@ -161,10 +160,10 @@ def test_categorical_conversion(setup_path): # We are expecting an empty DataFrame matching types of df expected = df.iloc[[], :] - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", data_columns=True) - result = read_hdf(path, "df", where="obsids=B") - tm.assert_frame_equal(result, expected) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) # Test with categories df.obsids = df.obsids.astype("category") @@ -172,13 +171,13 @@ def test_categorical_conversion(setup_path): # We are expecting an empty DataFrame matching types of df expected = df.iloc[[], :] - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", data_columns=True) - result = read_hdf(path, "df", where="obsids=B") - tm.assert_frame_equal(result, expected) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) -def test_categorical_nan_only_columns(setup_path): +def test_categorical_nan_only_columns(tmp_path, setup_path): # GH18413 # Check that read_hdf with categorical columns with NaN-only values can # be read back. @@ -194,10 +193,10 @@ def test_categorical_nan_only_columns(setup_path): df["b"] = df.b.astype("category") df["d"] = df.b.astype("category") expected = df - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", data_columns=True) - result = read_hdf(path, "df") - tm.assert_frame_equal(result, expected) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -207,7 +206,9 @@ def test_categorical_nan_only_columns(setup_path): ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})), ], ) -def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame): +def test_convert_value( + tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame +): # GH39420 # Check that read_hdf with categorical columns can filter by where condition. df.col = df.col.astype("category") @@ -216,7 +217,7 @@ def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFram expected.col = expected.col.astype("category") expected.col = expected.col.cat.set_categories(categorical_values) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", min_itemsize=max_widths) - result = read_hdf(path, where=where) - tm.assert_frame_equal(result, expected) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table", min_itemsize=max_widths) + result = read_hdf(path, where=where) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index 5fe55fda8a452..7f71d2666c9ae 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -2,13 +2,12 @@ import pandas as pd import pandas._testing as tm -from pandas.tests.io.pytables.common import ensure_clean_path tables = pytest.importorskip("tables") @pytest.fixture -def pytables_hdf5_file(): +def pytables_hdf5_file(tmp_path): """ Use PyTables to create a simple HDF5 file. """ @@ -29,16 +28,15 @@ def pytables_hdf5_file(): objname = "pandas_test_timeseries" - with ensure_clean_path("written_with_pytables.h5") as path: - # The `ensure_clean_path` context mgr removes the temp file upon exit. - with tables.open_file(path, mode="w") as f: - t = f.create_table("/", name=objname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() + path = tmp_path / "written_with_pytables.h5" + with tables.open_file(path, mode="w") as f: + t = f.create_table("/", name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() - yield path, objname, pd.DataFrame(testsamples) + yield path, objname, pd.DataFrame(testsamples) class TestReadPyTablesHDF5: diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index f3a43f669b1d5..870458e93689f 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -9,48 +9,45 @@ Series, ) import pandas._testing as tm -from pandas.tests.io.pytables.common import ( - ensure_clean_path, - ensure_clean_store, -) +from pandas.tests.io.pytables.common import ensure_clean_store from pandas.io.pytables import read_hdf -def test_complex_fixed(setup_path): +def test_complex_fixed(tmp_path, setup_path): df = DataFrame( np.random.rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) df = DataFrame( np.random.rand(4, 5).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) -def test_complex_table(setup_path): +def test_complex_table(tmp_path, setup_path): df = DataFrame( np.random.rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) df = DataFrame( np.random.rand(4, 5).astype(np.complex128), @@ -58,13 +55,13 @@ def test_complex_table(setup_path): columns=list("ABCDE"), ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table", mode="w") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table", mode="w") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) -def test_complex_mixed_fixed(setup_path): +def test_complex_mixed_fixed(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 ) @@ -81,13 +78,13 @@ def test_complex_mixed_fixed(setup_path): }, index=list("abcd"), ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) -def test_complex_mixed_table(setup_path): +def test_complex_mixed_table(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 ) @@ -110,13 +107,13 @@ def test_complex_mixed_table(setup_path): result = store.select("df", where="A>2") tm.assert_frame_equal(df.loc[df.A > 2], result) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - reread = read_hdf(path, "df") - tm.assert_frame_equal(df, reread) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) -def test_complex_across_dimensions_fixed(setup_path): +def test_complex_across_dimensions_fixed(tmp_path, setup_path): with catch_warnings(record=True): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) @@ -125,13 +122,13 @@ def test_complex_across_dimensions_fixed(setup_path): objs = [s, df] comps = [tm.assert_series_equal, tm.assert_frame_equal] for obj, comp in zip(objs, comps): - with ensure_clean_path(setup_path) as path: - obj.to_hdf(path, "obj", format="fixed") - reread = read_hdf(path, "obj") - comp(obj, reread) + path = tmp_path / setup_path + obj.to_hdf(path, "obj", format="fixed") + reread = read_hdf(path, "obj") + comp(obj, reread) -def test_complex_across_dimensions(setup_path): +def test_complex_across_dimensions(tmp_path, setup_path): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) df = DataFrame({"A": s, "B": s}) @@ -141,10 +138,10 @@ def test_complex_across_dimensions(setup_path): objs = [df] comps = [tm.assert_frame_equal] for obj, comp in zip(objs, comps): - with ensure_clean_path(setup_path) as path: - obj.to_hdf(path, "obj", format="table") - reread = read_hdf(path, "obj") - comp(obj, reread) + path = tmp_path / setup_path + obj.to_hdf(path, "obj", format="table") + reread = read_hdf(path, "obj") + comp(obj, reread) def test_complex_indexing_error(setup_path): @@ -169,7 +166,7 @@ def test_complex_indexing_error(setup_path): store.append("df", df, data_columns=["C"]) -def test_complex_series_error(setup_path): +def test_complex_series_error(tmp_path, setup_path): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) @@ -181,14 +178,14 @@ def test_complex_series_error(setup_path): "values to data_columns when initializing the table." ) - with ensure_clean_path(setup_path) as path: - with pytest.raises(TypeError, match=msg): - s.to_hdf(path, "obj", format="t") + path = tmp_path / setup_path + with pytest.raises(TypeError, match=msg): + s.to_hdf(path, "obj", format="t") - with ensure_clean_path(setup_path) as path: - s.to_hdf(path, "obj", format="t", index=False) - reread = read_hdf(path, "obj") - tm.assert_series_equal(s, reread) + path = tmp_path / setup_path + s.to_hdf(path, "obj", format="t", index=False) + reread = read_hdf(path, "obj") + tm.assert_series_equal(s, reread) def test_complex_append(setup_path): diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index f31c185af0497..7e590df95f952 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -15,10 +15,7 @@ date_range, read_hdf, ) -from pandas.tests.io.pytables.common import ( - ensure_clean_path, - ensure_clean_store, -) +from pandas.tests.io.pytables.common import ensure_clean_store from pandas.io.pytables import ( Term, @@ -90,7 +87,7 @@ def test_unimplemented_dtypes_table_columns(setup_path): store.append("df_unimplemented", df) -def test_invalid_terms(setup_path): +def test_invalid_terms(tmp_path, setup_path): with ensure_clean_store(setup_path) as store: @@ -122,36 +119,36 @@ def test_invalid_terms(setup_path): store.select("df", "index>") # from the docs - with ensure_clean_path(setup_path) as path: - dfq = DataFrame( - np.random.randn(10, 4), - columns=list("ABCD"), - index=date_range("20130101", periods=10), - ) - dfq.to_hdf(path, "dfq", format="table", data_columns=True) + path = tmp_path / setup_path + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table", data_columns=True) - # check ok - read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']") - read_hdf(path, "dfq", where="A>0 or C>0") + # check ok + read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']") + read_hdf(path, "dfq", where="A>0 or C>0") # catch the invalid reference - with ensure_clean_path(setup_path) as path: - dfq = DataFrame( - np.random.randn(10, 4), - columns=list("ABCD"), - index=date_range("20130101", periods=10), - ) - dfq.to_hdf(path, "dfq", format="table") - - msg = ( - r"The passed where expression: A>0 or C>0\n\s*" - r"contains an invalid variable reference\n\s*" - r"all of the variable references must be a reference to\n\s*" - r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" - r"The currently defined references are: index,columns\n" - ) - with pytest.raises(ValueError, match=msg): - read_hdf(path, "dfq", where="A>0 or C>0") + path = tmp_path / setup_path + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table") + + msg = ( + r"The passed where expression: A>0 or C>0\n\s*" + r"contains an invalid variable reference\n\s*" + r"all of the variable references must be a reference to\n\s*" + r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" + r"The currently defined references are: index,columns\n" + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path, "dfq", where="A>0 or C>0") def test_append_with_diff_col_name_types_raises_value_error(setup_path): @@ -188,13 +185,13 @@ def test_invalid_complib(setup_path): CategoricalIndex(list("abc")), ], ) -def test_to_hdf_multiindex_extension_dtype(idx, setup_path): +def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path): # GH 7775 mi = MultiIndex.from_arrays([idx, idx]) df = DataFrame(0, index=mi, columns=["a"]) - with ensure_clean_path(setup_path) as path: - with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): - df.to_hdf(path, "df") + path = tmp_path / setup_path + with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): + df.to_hdf(path, "df") def test_unsuppored_hdf_file_error(datapath): @@ -209,21 +206,21 @@ def test_unsuppored_hdf_file_error(datapath): read_hdf(data_path) -def test_read_hdf_errors(setup_path): +def test_read_hdf_errors(setup_path, tmp_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) - with ensure_clean_path(setup_path) as path: - msg = r"File [\S]* does not exist" - with pytest.raises(OSError, match=msg): - read_hdf(path, "key") + path = tmp_path / setup_path + msg = r"File [\S]* does not exist" + with pytest.raises(OSError, match=msg): + read_hdf(path, "key") - df.to_hdf(path, "df") - store = HDFStore(path, mode="r") - store.close() + df.to_hdf(path, "df") + store = HDFStore(path, mode="r") + store.close() - msg = "The HDFStore must be open for reading." - with pytest.raises(OSError, match=msg): - read_hdf(store, "df") + msg = "The HDFStore must be open for reading." + with pytest.raises(OSError, match=msg): + read_hdf(store, "df") def test_read_hdf_generic_buffer_errors(): diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index 13b6b94dda8d4..9b20820e355a6 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -18,7 +18,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, tables, ) @@ -30,116 +29,116 @@ @pytest.mark.parametrize("mode", ["r", "r+", "a", "w"]) -def test_mode(setup_path, mode): +def test_mode(setup_path, tmp_path, mode): df = tm.makeTimeDataFrame() msg = r"[\S]* does not exist" - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - # constructor - if mode in ["r", "r+"]: - with pytest.raises(OSError, match=msg): - HDFStore(path, mode=mode) + # constructor + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): + HDFStore(path, mode=mode) - else: - store = HDFStore(path, mode=mode) - assert store._handle.mode == mode - store.close() + else: + store = HDFStore(path, mode=mode) + assert store._handle.mode == mode + store.close() - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - # context - if mode in ["r", "r+"]: - with pytest.raises(OSError, match=msg): - with HDFStore(path, mode=mode) as store: - pass - else: + # context + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): with HDFStore(path, mode=mode) as store: - assert store._handle.mode == mode + pass + else: + with HDFStore(path, mode=mode) as store: + assert store._handle.mode == mode - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - # conv write - if mode in ["r", "r+"]: - with pytest.raises(OSError, match=msg): - df.to_hdf(path, "df", mode=mode) - df.to_hdf(path, "df", mode="w") - else: + # conv write + if mode in ["r", "r+"]: + with pytest.raises(OSError, match=msg): df.to_hdf(path, "df", mode=mode) + df.to_hdf(path, "df", mode="w") + else: + df.to_hdf(path, "df", mode=mode) - # conv read - if mode in ["w"]: - msg = ( - "mode w is not allowed while performing a read. " - r"Allowed modes are r, r\+ and a." - ) - with pytest.raises(ValueError, match=msg): - read_hdf(path, "df", mode=mode) - else: - result = read_hdf(path, "df", mode=mode) - tm.assert_frame_equal(result, df) + # conv read + if mode in ["w"]: + msg = ( + "mode w is not allowed while performing a read. " + r"Allowed modes are r, r\+ and a." + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path, "df", mode=mode) + else: + result = read_hdf(path, "df", mode=mode) + tm.assert_frame_equal(result, df) -def test_default_mode(setup_path): +def test_default_mode(tmp_path, setup_path): # read_hdf uses default mode df = tm.makeTimeDataFrame() - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="w") - result = read_hdf(path, "df") - tm.assert_frame_equal(result, df) - + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="w") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, df) -def test_reopen_handle(setup_path): - with ensure_clean_path(setup_path) as path: - - store = HDFStore(path, mode="a") - store["a"] = tm.makeTimeSeries() +def test_reopen_handle(tmp_path, setup_path): - msg = ( - r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the " - "current file!" - ) - # invalid mode change - with pytest.raises(PossibleDataLossError, match=msg): - store.open("w") + path = tmp_path / setup_path - store.close() - assert not store.is_open + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() - # truncation ok here + msg = ( + r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the " + "current file!" + ) + # invalid mode change + with pytest.raises(PossibleDataLossError, match=msg): store.open("w") - assert store.is_open - assert len(store) == 0 - store.close() - assert not store.is_open - - store = HDFStore(path, mode="a") - store["a"] = tm.makeTimeSeries() - - # reopen as read - store.open("r") - assert store.is_open - assert len(store) == 1 - assert store._mode == "r" - store.close() - assert not store.is_open - - # reopen as append - store.open("a") - assert store.is_open - assert len(store) == 1 - assert store._mode == "a" - store.close() - assert not store.is_open - # reopen as append (again) - store.open("a") - assert store.is_open - assert len(store) == 1 - assert store._mode == "a" - store.close() - assert not store.is_open + store.close() + assert not store.is_open + + # truncation ok here + store.open("w") + assert store.is_open + assert len(store) == 0 + store.close() + assert not store.is_open + + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() + + # reopen as read + store.open("r") + assert store.is_open + assert len(store) == 1 + assert store._mode == "r" + store.close() + assert not store.is_open + + # reopen as append + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open + + # reopen as append (again) + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open def test_open_args(setup_path): @@ -172,61 +171,64 @@ def test_flush(setup_path): store.flush(fsync=True) -def test_complibs_default_settings(setup_path): +def test_complibs_default_settings(tmp_path, setup_path): # GH15943 df = tm.makeDataFrame() # Set complevel and check if complib is automatically set to # default value - with ensure_clean_path(setup_path) as tmpfile: - df.to_hdf(tmpfile, "df", complevel=9) - result = read_hdf(tmpfile, "df") - tm.assert_frame_equal(result, df) + tmpfile = tmp_path / setup_path + df.to_hdf(tmpfile, "df", complevel=9) + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) - with tables.open_file(tmpfile, mode="r") as h5file: - for node in h5file.walk_nodes(where="/df", classname="Leaf"): - assert node.filters.complevel == 9 - assert node.filters.complib == "zlib" + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "zlib" # Set complib and check to see if compression is disabled - with ensure_clean_path(setup_path) as tmpfile: - df.to_hdf(tmpfile, "df", complib="zlib") - result = read_hdf(tmpfile, "df") - tm.assert_frame_equal(result, df) + tmpfile = tmp_path / setup_path + df.to_hdf(tmpfile, "df", complib="zlib") + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) - with tables.open_file(tmpfile, mode="r") as h5file: - for node in h5file.walk_nodes(where="/df", classname="Leaf"): - assert node.filters.complevel == 0 - assert node.filters.complib is None + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None # Check if not setting complib or complevel results in no compression - with ensure_clean_path(setup_path) as tmpfile: - df.to_hdf(tmpfile, "df") - result = read_hdf(tmpfile, "df") - tm.assert_frame_equal(result, df) + tmpfile = tmp_path / setup_path + df.to_hdf(tmpfile, "df") + result = read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) - with tables.open_file(tmpfile, mode="r") as h5file: - for node in h5file.walk_nodes(where="/df", classname="Leaf"): - assert node.filters.complevel == 0 - assert node.filters.complib is None - - # Check if file-defaults can be overridden on a per table basis - with ensure_clean_path(setup_path) as tmpfile: - store = HDFStore(tmpfile) - store.append("dfc", df, complevel=9, complib="blosc") - store.append("df", df) - store.close() - - with tables.open_file(tmpfile, mode="r") as h5file: - for node in h5file.walk_nodes(where="/df", classname="Leaf"): - assert node.filters.complevel == 0 - assert node.filters.complib is None - for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): - assert node.filters.complevel == 9 - assert node.filters.complib == "blosc" + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None -def test_complibs(setup_path): +def test_complibs_default_settings_override(tmp_path, setup_path): + # Check if file-defaults can be overridden on a per table basis + df = tm.makeDataFrame() + tmpfile = tmp_path / setup_path + store = HDFStore(tmpfile) + store.append("dfc", df, complevel=9, complib="blosc") + store.append("df", df) + store.close() + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "blosc" + + +def test_complibs(tmp_path, setup_path): # GH14478 df = tm.makeDataFrame() @@ -243,24 +245,24 @@ def test_complibs(setup_path): all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] for (lib, lvl) in all_tests: - with ensure_clean_path(setup_path) as tmpfile: - gname = "foo" - - # Write and read file to see if data is consistent - df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) - result = read_hdf(tmpfile, gname) - tm.assert_frame_equal(result, df) - - # Open file and check metadata - # for correct amount of compression - h5table = tables.open_file(tmpfile, mode="r") - for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"): - assert node.filters.complevel == lvl - if lvl == 0: - assert node.filters.complib is None - else: - assert node.filters.complib == lib - h5table.close() + tmpfile = tmp_path / setup_path + gname = "foo" + + # Write and read file to see if data is consistent + df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) + result = read_hdf(tmpfile, gname) + tm.assert_frame_equal(result, df) + + # Open file and check metadata + # for correct amount of compression + h5table = tables.open_file(tmpfile, mode="r") + for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"): + assert node.filters.complevel == lvl + if lvl == 0: + assert node.filters.complib is None + else: + assert node.filters.complib == lib + h5table.close() @pytest.mark.skipif( @@ -296,7 +298,7 @@ def test_encoding(setup_path): ], ) @pytest.mark.parametrize("dtype", ["category", object]) -def test_latin_encoding(setup_path, dtype, val): +def test_latin_encoding(tmp_path, setup_path, dtype, val): enc = "latin-1" nan_rep = "" key = "data" @@ -304,141 +306,141 @@ def test_latin_encoding(setup_path, dtype, val): val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] ser = Series(val, dtype=dtype) - with ensure_clean_path(setup_path) as store: - ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep) - retr = read_hdf(store, key) + store = tmp_path / setup_path + ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep) + retr = read_hdf(store, key) s_nan = ser.replace(nan_rep, np.nan) tm.assert_series_equal(s_nan, retr) -def test_multiple_open_close(setup_path): +def test_multiple_open_close(tmp_path, setup_path): # gh-4409: open & close multiple times - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - df = tm.makeDataFrame() - df.to_hdf(path, "df", mode="w", format="table") + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") + + # single + store = HDFStore(path) + assert "CLOSED" not in store.info() + assert store.is_open - # single - store = HDFStore(path) - assert "CLOSED" not in store.info() - assert store.is_open + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + path = tmp_path / setup_path + + if pytables._table_file_open_policy_is_strict: + # multiples + store1 = HDFStore(path) + msg = ( + r"The file [\S]* is already opened\. Please close it before " + r"reopening in write mode\." + ) + with pytest.raises(ValueError, match=msg): + HDFStore(path) + + store1.close() + else: + + # multiples + store1 = HDFStore(path) + store2 = HDFStore(path) + + assert "CLOSED" not in store1.info() + assert "CLOSED" not in store2.info() + assert store1.is_open + assert store2.is_open + + store1.close() + assert "CLOSED" in store1.info() + assert not store1.is_open + assert "CLOSED" not in store2.info() + assert store2.is_open + + store2.close() + assert "CLOSED" in store1.info() + assert "CLOSED" in store2.info() + assert not store1.is_open + assert not store2.is_open + + # nested close + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) + store2.append("df2", df) + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open + + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + # double closing + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) store.close() assert "CLOSED" in store.info() assert not store.is_open - with ensure_clean_path(setup_path) as path: - - if pytables._table_file_open_policy_is_strict: - # multiples - store1 = HDFStore(path) - msg = ( - r"The file [\S]* is already opened\. Please close it before " - r"reopening in write mode\." - ) - with pytest.raises(ValueError, match=msg): - HDFStore(path) - - store1.close() - else: - - # multiples - store1 = HDFStore(path) - store2 = HDFStore(path) - - assert "CLOSED" not in store1.info() - assert "CLOSED" not in store2.info() - assert store1.is_open - assert store2.is_open - - store1.close() - assert "CLOSED" in store1.info() - assert not store1.is_open - assert "CLOSED" not in store2.info() - assert store2.is_open - - store2.close() - assert "CLOSED" in store1.info() - assert "CLOSED" in store2.info() - assert not store1.is_open - assert not store2.is_open - - # nested close - store = HDFStore(path, mode="w") - store.append("df", df) - - store2 = HDFStore(path) - store2.append("df2", df) - store2.close() - assert "CLOSED" in store2.info() - assert not store2.is_open - - store.close() - assert "CLOSED" in store.info() - assert not store.is_open - - # double closing - store = HDFStore(path, mode="w") - store.append("df", df) - - store2 = HDFStore(path) - store.close() - assert "CLOSED" in store.info() - assert not store.is_open - - store2.close() - assert "CLOSED" in store2.info() - assert not store2.is_open + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open # ops on a closed store - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - df = tm.makeDataFrame() - df.to_hdf(path, "df", mode="w", format="table") + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") - store = HDFStore(path) - store.close() + store = HDFStore(path) + store.close() - msg = r"[\S]* file is not open!" - with pytest.raises(ClosedFileError, match=msg): - store.keys() + msg = r"[\S]* file is not open!" + with pytest.raises(ClosedFileError, match=msg): + store.keys() - with pytest.raises(ClosedFileError, match=msg): - "df" in store + with pytest.raises(ClosedFileError, match=msg): + "df" in store - with pytest.raises(ClosedFileError, match=msg): - len(store) + with pytest.raises(ClosedFileError, match=msg): + len(store) - with pytest.raises(ClosedFileError, match=msg): - store["df"] + with pytest.raises(ClosedFileError, match=msg): + store["df"] - with pytest.raises(ClosedFileError, match=msg): - store.select("df") + with pytest.raises(ClosedFileError, match=msg): + store.select("df") - with pytest.raises(ClosedFileError, match=msg): - store.get("df") + with pytest.raises(ClosedFileError, match=msg): + store.get("df") - with pytest.raises(ClosedFileError, match=msg): - store.append("df2", df) + with pytest.raises(ClosedFileError, match=msg): + store.append("df2", df) - with pytest.raises(ClosedFileError, match=msg): - store.put("df3", df) + with pytest.raises(ClosedFileError, match=msg): + store.put("df3", df) - with pytest.raises(ClosedFileError, match=msg): - store.get_storer("df2") + with pytest.raises(ClosedFileError, match=msg): + store.get_storer("df2") - with pytest.raises(ClosedFileError, match=msg): - store.remove("df2") + with pytest.raises(ClosedFileError, match=msg): + store.remove("df2") - with pytest.raises(ClosedFileError, match=msg): - store.select("df") + with pytest.raises(ClosedFileError, match=msg): + store.select("df") - msg = "'HDFStore' object has no attribute 'df'" - with pytest.raises(AttributeError, match=msg): - store.df + msg = "'HDFStore' object has no attribute 'df'" + with pytest.raises(AttributeError, match=msg): + store.df def test_fspath(): diff --git a/pandas/tests/io/pytables/test_keys.py b/pandas/tests/io/pytables/test_keys.py index cf412a242b000..dff7e2144d3c2 100644 --- a/pandas/tests/io/pytables/test_keys.py +++ b/pandas/tests/io/pytables/test_keys.py @@ -6,7 +6,6 @@ _testing as tm, ) from pandas.tests.io.pytables.common import ( - ensure_clean_path, ensure_clean_store, tables, ) @@ -27,7 +26,7 @@ def test_keys(setup_path): assert set(store) == expected -def test_non_pandas_keys(setup_path): +def test_non_pandas_keys(tmp_path, setup_path): class Table1(tables.IsDescription): value1 = tables.Float32Col() @@ -37,20 +36,20 @@ class Table2(tables.IsDescription): class Table3(tables.IsDescription): value3 = tables.Float32Col() - with ensure_clean_path(setup_path) as path: - with tables.open_file(path, mode="w") as h5file: - group = h5file.create_group("/", "group") - h5file.create_table(group, "table1", Table1, "Table 1") - h5file.create_table(group, "table2", Table2, "Table 2") - h5file.create_table(group, "table3", Table3, "Table 3") - with HDFStore(path) as store: - assert len(store.keys(include="native")) == 3 - expected = {"/group/table1", "/group/table2", "/group/table3"} - assert set(store.keys(include="native")) == expected - assert set(store.keys(include="pandas")) == set() - for name in expected: - df = store.get(name) - assert len(df.columns) == 1 + path = tmp_path / setup_path + with tables.open_file(path, mode="w") as h5file: + group = h5file.create_group("/", "group") + h5file.create_table(group, "table1", Table1, "Table 1") + h5file.create_table(group, "table2", Table2, "Table 2") + h5file.create_table(group, "table3", Table3, "Table 3") + with HDFStore(path) as store: + assert len(store.keys(include="native")) == 3 + expected = {"/group/table1", "/group/table2", "/group/table3"} + assert set(store.keys(include="native")) == expected + assert set(store.keys(include="pandas")) == set() + for name in expected: + df = store.get(name) + assert len(df.columns) == 1 def test_keys_illegal_include_keyword_value(setup_path): diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 4b3fcf4e96cad..2699d33950412 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -24,7 +24,6 @@ from pandas.core.api import Int64Index from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) from pandas.util import _test_decorators as td @@ -32,28 +31,26 @@ pytestmark = pytest.mark.single_cpu -def test_format_type(setup_path): +def test_format_type(tmp_path, setup_path): df = DataFrame({"A": [1, 2]}) - with ensure_clean_path(setup_path) as path: - with HDFStore(path) as store: - store.put("a", df, format="fixed") - store.put("b", df, format="table") + with HDFStore(tmp_path / setup_path) as store: + store.put("a", df, format="fixed") + store.put("b", df, format="table") - assert store.get_storer("a").format_type == "fixed" - assert store.get_storer("b").format_type == "table" + assert store.get_storer("a").format_type == "fixed" + assert store.get_storer("b").format_type == "table" -def test_format_kwarg_in_constructor(setup_path): +def test_format_kwarg_in_constructor(tmp_path, setup_path): # GH 13291 msg = "format is not a defined argument for HDFStore" - with tm.ensure_clean(setup_path) as path: - with pytest.raises(ValueError, match=msg): - HDFStore(path, format="table") + with pytest.raises(ValueError, match=msg): + HDFStore(tmp_path / setup_path, format="table") -def test_api_default_format(setup_path): +def test_api_default_format(tmp_path, setup_path): # default_format option with ensure_clean_store(setup_path) as store: @@ -77,23 +74,23 @@ def test_api_default_format(setup_path): store.append("df2", df) assert store.get_storer("df").is_table - with ensure_clean_path(setup_path) as path: - df = tm.makeDataFrame() + path = tmp_path / setup_path + df = tm.makeDataFrame() - with pd.option_context("io.hdf.default_format", "fixed"): - df.to_hdf(path, "df") - with HDFStore(path) as store: - assert not store.get_storer("df").is_table - with pytest.raises(ValueError, match=msg): - df.to_hdf(path, "df2", append=True) + with pd.option_context("io.hdf.default_format", "fixed"): + df.to_hdf(path, "df") + with HDFStore(path) as store: + assert not store.get_storer("df").is_table + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df2", append=True) - with pd.option_context("io.hdf.default_format", "table"): - df.to_hdf(path, "df3") - with HDFStore(path) as store: - assert store.get_storer("df3").is_table - df.to_hdf(path, "df4", append=True) - with HDFStore(path) as store: - assert store.get_storer("df4").is_table + with pd.option_context("io.hdf.default_format", "table"): + df.to_hdf(path, "df3") + with HDFStore(path) as store: + assert store.get_storer("df3").is_table + df.to_hdf(path, "df4", append=True) + with HDFStore(path) as store: + assert store.get_storer("df4").is_table def test_put(setup_path): @@ -368,14 +365,14 @@ def make_index(names=None): @pytest.mark.parametrize("format", ["fixed", "table"]) -def test_store_periodindex(setup_path, format): +def test_store_periodindex(tmp_path, setup_path, format): # GH 7796 # test of PeriodIndex in HDFStore df = DataFrame( np.random.randn(5, 1), index=pd.period_range("20220101", freq="M", periods=5) ) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="w", format=format) - expected = pd.read_hdf(path, "df") - tm.assert_frame_equal(df, expected) + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="w", format=format) + expected = pd.read_hdf(path, "df") + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index 4b57bc8291442..1163b9e11a367 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -18,7 +18,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) from pandas.util import _test_decorators as td @@ -28,34 +27,34 @@ pytestmark = pytest.mark.single_cpu -def test_read_missing_key_close_store(setup_path): +def test_read_missing_key_close_store(tmp_path, setup_path): # GH 25766 - with ensure_clean_path(setup_path) as path: - df = DataFrame({"a": range(2), "b": range(2)}) - df.to_hdf(path, "k1") + path = tmp_path / setup_path + df = DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") - with pytest.raises(KeyError, match="'No object named k2 in the file'"): - read_hdf(path, "k2") + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + read_hdf(path, "k2") - # smoke test to test that file is properly closed after - # read with KeyError before another write - df.to_hdf(path, "k2") + # smoke test to test that file is properly closed after + # read with KeyError before another write + df.to_hdf(path, "k2") -def test_read_missing_key_opened_store(setup_path): +def test_read_missing_key_opened_store(tmp_path, setup_path): # GH 28699 - with ensure_clean_path(setup_path) as path: - df = DataFrame({"a": range(2), "b": range(2)}) - df.to_hdf(path, "k1") + path = tmp_path / setup_path + df = DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") - with HDFStore(path, "r") as store: + with HDFStore(path, "r") as store: - with pytest.raises(KeyError, match="'No object named k2 in the file'"): - read_hdf(store, "k2") + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + read_hdf(store, "k2") - # Test that the file is still open after a KeyError and that we can - # still read from it. - read_hdf(store, "k1") + # Test that the file is still open after a KeyError and that we can + # still read from it. + read_hdf(store, "k1") def test_read_column(setup_path): @@ -198,99 +197,99 @@ def test_legacy_table_read_py2(datapath): tm.assert_frame_equal(expected, result) -def test_read_hdf_open_store(setup_path): +def test_read_hdf_open_store(tmp_path, setup_path): # GH10330 # No check for non-string path_or-buf, and no test of open store df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) df.index.name = "letters" df = df.set_index(keys="E", append=True) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="w") - direct = read_hdf(path, "df") - store = HDFStore(path, mode="r") - indirect = read_hdf(store, "df") - tm.assert_frame_equal(direct, indirect) - assert store.is_open - store.close() + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="w") + direct = read_hdf(path, "df") + store = HDFStore(path, mode="r") + indirect = read_hdf(store, "df") + tm.assert_frame_equal(direct, indirect) + assert store.is_open + store.close() -def test_read_hdf_iterator(setup_path): +def test_read_hdf_iterator(tmp_path, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) df.index.name = "letters" df = df.set_index(keys="E", append=True) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="w", format="t") - direct = read_hdf(path, "df") - iterator = read_hdf(path, "df", iterator=True) - assert isinstance(iterator, TableIterator) - indirect = next(iterator.__iter__()) - tm.assert_frame_equal(direct, indirect) - iterator.store.close() + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="w", format="t") + direct = read_hdf(path, "df") + iterator = read_hdf(path, "df", iterator=True) + assert isinstance(iterator, TableIterator) + indirect = next(iterator.__iter__()) + tm.assert_frame_equal(direct, indirect) + iterator.store.close() -def test_read_nokey(setup_path): +def test_read_nokey(tmp_path, setup_path): # GH10443 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) # Categorical dtype not supported for "fixed" format. So no need # to test with that dtype in the dataframe here. - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="a") - reread = read_hdf(path) - tm.assert_frame_equal(df, reread) - df.to_hdf(path, "df2", mode="a") + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="a") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a") - msg = "key must be provided when HDF5 file contains multiple datasets." - with pytest.raises(ValueError, match=msg): - read_hdf(path) + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): + read_hdf(path) -def test_read_nokey_table(setup_path): +def test_read_nokey_table(tmp_path, setup_path): # GH13231 df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")}) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", mode="a", format="table") - reread = read_hdf(path) - tm.assert_frame_equal(df, reread) - df.to_hdf(path, "df2", mode="a", format="table") + path = tmp_path / setup_path + df.to_hdf(path, "df", mode="a", format="table") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a", format="table") - msg = "key must be provided when HDF5 file contains multiple datasets." - with pytest.raises(ValueError, match=msg): - read_hdf(path) + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): + read_hdf(path) -def test_read_nokey_empty(setup_path): - with ensure_clean_path(setup_path) as path: - store = HDFStore(path) - store.close() - msg = re.escape( - "Dataset(s) incompatible with Pandas data types, not table, or no " - "datasets found in HDF5 file." - ) - with pytest.raises(ValueError, match=msg): - read_hdf(path) +def test_read_nokey_empty(tmp_path, setup_path): + path = tmp_path / setup_path + store = HDFStore(path) + store.close() + msg = re.escape( + "Dataset(s) incompatible with Pandas data types, not table, or no " + "datasets found in HDF5 file." + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path) -def test_read_from_pathlib_path(setup_path): +def test_read_from_pathlib_path(tmp_path, setup_path): # GH11773 expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) - with ensure_clean_path(setup_path) as filename: - path_obj = Path(filename) + filename = tmp_path / setup_path + path_obj = Path(filename) - expected.to_hdf(path_obj, "df", mode="a") - actual = read_hdf(path_obj, "df") + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") tm.assert_frame_equal(expected, actual) @td.skip_if_no("py.path") -def test_read_from_py_localpath(setup_path): +def test_read_from_py_localpath(tmp_path, setup_path): # GH11773 from py.path import local as LocalPath @@ -298,24 +297,24 @@ def test_read_from_py_localpath(setup_path): expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) - with ensure_clean_path(setup_path) as filename: - path_obj = LocalPath(filename) + filename = tmp_path / setup_path + path_obj = LocalPath(filename) - expected.to_hdf(path_obj, "df", mode="a") - actual = read_hdf(path_obj, "df") + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") tm.assert_frame_equal(expected, actual) @pytest.mark.parametrize("format", ["fixed", "table"]) -def test_read_hdf_series_mode_r(format, setup_path): +def test_read_hdf_series_mode_r(tmp_path, format, setup_path): # GH 16583 # Tests that reading a Series saved to an HDF file # still works if a mode='r' argument is supplied series = tm.makeFloatSeries() - with ensure_clean_path(setup_path) as path: - series.to_hdf(path, key="data", format=format) - result = read_hdf(path, key="data", mode="r") + path = tmp_path / setup_path + series.to_hdf(path, key="data", format=format) + result = read_hdf(path, key="data", mode="r") tm.assert_series_equal(result, series) diff --git a/pandas/tests/io/pytables/test_retain_attributes.py b/pandas/tests/io/pytables/test_retain_attributes.py index bd02121bd3252..4a2bfee5dc2dc 100644 --- a/pandas/tests/io/pytables/test_retain_attributes.py +++ b/pandas/tests/io/pytables/test_retain_attributes.py @@ -13,7 +13,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) @@ -77,41 +76,33 @@ def test_retain_index_attributes(setup_path): @pytest.mark.filterwarnings( "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" ) -def test_retain_index_attributes2(setup_path): - with ensure_clean_path(setup_path) as path: +def test_retain_index_attributes2(tmp_path, setup_path): + path = tmp_path / setup_path - with catch_warnings(record=True): + with catch_warnings(record=True): - df = DataFrame( - { - "A": Series( - range(3), index=date_range("2000-1-1", periods=3, freq="H") - ) - } - ) - df.to_hdf(path, "data", mode="w", append=True) - df2 = DataFrame( - { - "A": Series( - range(3), index=date_range("2002-1-1", periods=3, freq="D") - ) - } - ) + df = DataFrame( + {"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))} + ) + df.to_hdf(path, "data", mode="w", append=True) + df2 = DataFrame( + {"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))} + ) - df2.to_hdf(path, "data", append=True) + df2.to_hdf(path, "data", append=True) - idx = date_range("2000-1-1", periods=3, freq="H") - idx.name = "foo" - df = DataFrame({"A": Series(range(3), index=idx)}) - df.to_hdf(path, "data", mode="w", append=True) + idx = date_range("2000-1-1", periods=3, freq="H") + idx.name = "foo" + df = DataFrame({"A": Series(range(3), index=idx)}) + df.to_hdf(path, "data", mode="w", append=True) - assert read_hdf(path, "data").index.name == "foo" + assert read_hdf(path, "data").index.name == "foo" - with catch_warnings(record=True): + with catch_warnings(record=True): - idx2 = date_range("2001-1-1", periods=3, freq="H") - idx2.name = "bar" - df2 = DataFrame({"A": Series(range(3), index=idx2)}) - df2.to_hdf(path, "data", append=True) + idx2 = date_range("2001-1-1", periods=3, freq="H") + idx2.name = "bar" + df2 = DataFrame({"A": Series(range(3), index=idx2)}) + df2.to_hdf(path, "data", append=True) - assert read_hdf(path, "data").index.name is None + assert read_hdf(path, "data").index.name is None diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 556272b913ff0..ce71e9e990364 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -22,7 +22,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) from pandas.util import _test_decorators as td @@ -70,48 +69,52 @@ def test_long_strings(setup_path): tm.assert_frame_equal(df, result) -def test_api(setup_path): +def test_api(tmp_path, setup_path): # GH4584 # API issue when to_hdf doesn't accept append AND format args - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - df = tm.makeDataFrame() - df.iloc[:10].to_hdf(path, "df", append=True, format="table") - df.iloc[10:].to_hdf(path, "df", append=True, format="table") - tm.assert_frame_equal(read_hdf(path, "df"), df) + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) - # append to False - df.iloc[:10].to_hdf(path, "df", append=False, format="table") - df.iloc[10:].to_hdf(path, "df", append=True, format="table") - tm.assert_frame_equal(read_hdf(path, "df"), df) + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) - with ensure_clean_path(setup_path) as path: - df = tm.makeDataFrame() - df.iloc[:10].to_hdf(path, "df", append=True) - df.iloc[10:].to_hdf(path, "df", append=True, format="table") - tm.assert_frame_equal(read_hdf(path, "df"), df) +def test_api_append(tmp_path, setup_path): + path = tmp_path / setup_path - # append to False - df.iloc[:10].to_hdf(path, "df", append=False, format="table") - df.iloc[10:].to_hdf(path, "df", append=True) - tm.assert_frame_equal(read_hdf(path, "df"), df) + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True) + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) - with ensure_clean_path(setup_path) as path: + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True) + tm.assert_frame_equal(read_hdf(path, "df"), df) - df = tm.makeDataFrame() - df.to_hdf(path, "df", append=False, format="fixed") - tm.assert_frame_equal(read_hdf(path, "df"), df) - df.to_hdf(path, "df", append=False, format="f") - tm.assert_frame_equal(read_hdf(path, "df"), df) +def test_api_2(tmp_path, setup_path): + path = tmp_path / setup_path - df.to_hdf(path, "df", append=False) - tm.assert_frame_equal(read_hdf(path, "df"), df) + df = tm.makeDataFrame() + df.to_hdf(path, "df", append=False, format="fixed") + tm.assert_frame_equal(read_hdf(path, "df"), df) - df.to_hdf(path, "df") - tm.assert_frame_equal(read_hdf(path, "df"), df) + df.to_hdf(path, "df", append=False, format="f") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df", append=False) + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df") + tm.assert_frame_equal(read_hdf(path, "df"), df) with ensure_clean_store(setup_path) as store: @@ -139,25 +142,27 @@ def test_api(setup_path): store.append("df", df.iloc[10:], append=True, format=None) tm.assert_frame_equal(store.select("df"), df) - with ensure_clean_path(setup_path) as path: - # Invalid. - df = tm.makeDataFrame() - msg = "Can only append to Tables" +def test_api_invalid(tmp_path, setup_path): + path = tmp_path / setup_path + # Invalid. + df = tm.makeDataFrame() - with pytest.raises(ValueError, match=msg): - df.to_hdf(path, "df", append=True, format="f") + msg = "Can only append to Tables" - with pytest.raises(ValueError, match=msg): - df.to_hdf(path, "df", append=True, format="fixed") + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="f") + + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="fixed") - msg = r"invalid HDFStore format specified \[foo\]" + msg = r"invalid HDFStore format specified \[foo\]" - with pytest.raises(TypeError, match=msg): - df.to_hdf(path, "df", append=True, format="foo") + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=True, format="foo") - with pytest.raises(TypeError, match=msg): - df.to_hdf(path, "df", append=False, format="foo") + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=False, format="foo") # File path doesn't exist path = "" @@ -538,13 +543,13 @@ def test_store_datetime_mixed(setup_path): _check_roundtrip(df, tm.assert_frame_equal, path=setup_path) -def test_round_trip_equals(setup_path): +def test_round_trip_equals(tmp_path, setup_path): # GH 9330 df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - other = read_hdf(path, "df") - tm.assert_frame_equal(df, other) - assert df.equals(other) - assert other.equals(df) + path = tmp_path / setup_path + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index e26b47d6615bd..e28c70d83baa7 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -21,7 +21,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) @@ -320,7 +319,7 @@ def test_select_with_many_inputs(setup_path): assert len(result) == 100 -def test_select_iterator(setup_path): +def test_select_iterator(tmp_path, setup_path): # single table with ensure_clean_store(setup_path) as store: @@ -344,29 +343,29 @@ def test_select_iterator(setup_path): result = concat(results) tm.assert_frame_equal(result, expected) - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - df = tm.makeTimeDataFrame(500) - df.to_hdf(path, "df_non_table") + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df_non_table") - msg = "can only use an iterator or chunksize on a table" - with pytest.raises(TypeError, match=msg): - read_hdf(path, "df_non_table", chunksize=100) + msg = "can only use an iterator or chunksize on a table" + with pytest.raises(TypeError, match=msg): + read_hdf(path, "df_non_table", chunksize=100) - with pytest.raises(TypeError, match=msg): - read_hdf(path, "df_non_table", iterator=True) + with pytest.raises(TypeError, match=msg): + read_hdf(path, "df_non_table", iterator=True) - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - df = tm.makeTimeDataFrame(500) - df.to_hdf(path, "df", format="table") + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df", format="table") - results = list(read_hdf(path, "df", chunksize=100)) - result = concat(results) + results = list(read_hdf(path, "df", chunksize=100)) + result = concat(results) - assert len(results) == 5 - tm.assert_frame_equal(result, df) - tm.assert_frame_equal(result, read_hdf(path, "df")) + assert len(results) == 5 + tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, read_hdf(path, "df")) # multiple @@ -659,69 +658,68 @@ def test_frame_select_complex(setup_path): tm.assert_frame_equal(result, expected) -def test_frame_select_complex2(): - - with ensure_clean_path(["params.hdf", "hist.hdf"]) as paths: +def test_frame_select_complex2(tmp_path): - pp, hh = paths + pp = tmp_path / "params.hdf" + hh = tmp_path / "hist.hdf" - # use non-trivial selection criteria - params = DataFrame({"A": [1, 1, 2, 2, 3]}) - params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) + # use non-trivial selection criteria + params = DataFrame({"A": [1, 1, 2, 2, 3]}) + params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) - selection = read_hdf(pp, "df", where="A=[2,3]") - hist = DataFrame( - np.random.randn(25, 1), - columns=["data"], - index=MultiIndex.from_tuples( - [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] - ), - ) + selection = read_hdf(pp, "df", where="A=[2,3]") + hist = DataFrame( + np.random.randn(25, 1), + columns=["data"], + index=MultiIndex.from_tuples( + [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] + ), + ) - hist.to_hdf(hh, "df", mode="w", format="table") + hist.to_hdf(hh, "df", mode="w", format="table") - expected = read_hdf(hh, "df", where="l1=[2, 3, 4]") + expected = read_hdf(hh, "df", where="l1=[2, 3, 4]") - # scope with list like - l0 = selection.index.tolist() # noqa:F841 - store = HDFStore(hh) - result = store.select("df", where="l1=l0") - tm.assert_frame_equal(result, expected) - store.close() + # scope with list like + l0 = selection.index.tolist() # noqa:F841 + store = HDFStore(hh) + result = store.select("df", where="l1=l0") + tm.assert_frame_equal(result, expected) + store.close() - result = read_hdf(hh, "df", where="l1=l0") - tm.assert_frame_equal(result, expected) + result = read_hdf(hh, "df", where="l1=l0") + tm.assert_frame_equal(result, expected) - # index - index = selection.index # noqa:F841 - result = read_hdf(hh, "df", where="l1=index") - tm.assert_frame_equal(result, expected) + # index + index = selection.index # noqa:F841 + result = read_hdf(hh, "df", where="l1=index") + tm.assert_frame_equal(result, expected) - result = read_hdf(hh, "df", where="l1=selection.index") - tm.assert_frame_equal(result, expected) + result = read_hdf(hh, "df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) - result = read_hdf(hh, "df", where="l1=selection.index.tolist()") - tm.assert_frame_equal(result, expected) + result = read_hdf(hh, "df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) - result = read_hdf(hh, "df", where="l1=list(selection.index)") - tm.assert_frame_equal(result, expected) + result = read_hdf(hh, "df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) - # scope with index - store = HDFStore(hh) + # scope with index + store = HDFStore(hh) - result = store.select("df", where="l1=index") - tm.assert_frame_equal(result, expected) + result = store.select("df", where="l1=index") + tm.assert_frame_equal(result, expected) - result = store.select("df", where="l1=selection.index") - tm.assert_frame_equal(result, expected) + result = store.select("df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) - result = store.select("df", where="l1=selection.index.tolist()") - tm.assert_frame_equal(result, expected) + result = store.select("df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) - result = store.select("df", where="l1=list(selection.index)") - tm.assert_frame_equal(result, expected) + result = store.select("df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) - store.close() + store.close() def test_invalid_filtering(setup_path): @@ -965,12 +963,12 @@ def test_query_compare_column_type(setup_path): @pytest.mark.parametrize("where", ["", (), (None,), [], [None]]) -def test_select_empty_where(where): +def test_select_empty_where(tmp_path, where): # GH26610 df = DataFrame([1, 2, 3]) - with ensure_clean_path("empty_where.h5") as path: - with HDFStore(path) as store: - store.put("df", df, "t") - result = read_hdf(store, "df", where=where) - tm.assert_frame_equal(result, df) + path = tmp_path / "empty_where.h5" + with HDFStore(path) as store: + store.put("df", df, "t") + result = read_hdf(store, "df", where=where) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index db87c8facbfdb..ccea10f5b2612 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -25,7 +25,6 @@ import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, safe_close, ) @@ -57,7 +56,7 @@ def test_context(setup_path): assert type(tbl["a"]) == DataFrame -def test_no_track_times(setup_path): +def test_no_track_times(tmp_path, setup_path): # GH 32682 # enables to set track_times (see `pytables` `create_table` documentation) @@ -69,30 +68,30 @@ def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128): h.update(chunk) return h.digest() - def create_h5_and_return_checksum(track_times): - with ensure_clean_path(setup_path) as path: - df = DataFrame({"a": [1]}) + def create_h5_and_return_checksum(tmp_path, track_times): + path = tmp_path / setup_path + df = DataFrame({"a": [1]}) - with HDFStore(path, mode="w") as hdf: - hdf.put( - "table", - df, - format="table", - data_columns=True, - index=None, - track_times=track_times, - ) + with HDFStore(path, mode="w") as hdf: + hdf.put( + "table", + df, + format="table", + data_columns=True, + index=None, + track_times=track_times, + ) - return checksum(path) + return checksum(path) - checksum_0_tt_false = create_h5_and_return_checksum(track_times=False) - checksum_0_tt_true = create_h5_and_return_checksum(track_times=True) + checksum_0_tt_false = create_h5_and_return_checksum(tmp_path, track_times=False) + checksum_0_tt_true = create_h5_and_return_checksum(tmp_path, track_times=True) # sleep is necessary to create h5 with different creation time time.sleep(1) - checksum_1_tt_false = create_h5_and_return_checksum(track_times=False) - checksum_1_tt_true = create_h5_and_return_checksum(track_times=True) + checksum_1_tt_false = create_h5_and_return_checksum(tmp_path, track_times=False) + checksum_1_tt_true = create_h5_and_return_checksum(tmp_path, track_times=True) # checksums are the same if track_time = False assert checksum_0_tt_false == checksum_1_tt_false @@ -292,7 +291,7 @@ def test_getattr(setup_path): getattr(store, f"_{x}") -def test_store_dropna(setup_path): +def test_store_dropna(tmp_path, setup_path): df_with_missing = DataFrame( {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]}, index=list("abc"), @@ -303,51 +302,51 @@ def test_store_dropna(setup_path): # # Test to make sure defaults are to not drop. # # Corresponding to Issue 9382 - with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df", format="table") - reloaded = read_hdf(path, "df") - tm.assert_frame_equal(df_with_missing, reloaded) + path = tmp_path / setup_path + df_with_missing.to_hdf(path, "df", format="table") + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) - with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df", format="table", dropna=False) - reloaded = read_hdf(path, "df") - tm.assert_frame_equal(df_with_missing, reloaded) + path = tmp_path / setup_path + df_with_missing.to_hdf(path, "df", format="table", dropna=False) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) - with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df", format="table", dropna=True) - reloaded = read_hdf(path, "df") - tm.assert_frame_equal(df_without_missing, reloaded) + path = tmp_path / setup_path + df_with_missing.to_hdf(path, "df", format="table", dropna=True) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_without_missing, reloaded) -def test_to_hdf_with_min_itemsize(setup_path): +def test_to_hdf_with_min_itemsize(tmp_path, setup_path): - with ensure_clean_path(setup_path) as path: + path = tmp_path / setup_path - # min_itemsize in index with to_hdf (GH 10381) - df = tm.makeMixedDataFrame().set_index("C") - df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6}) - # just make sure there is a longer string: - df2 = df.copy().reset_index().assign(C="longer").set_index("C") - df2.to_hdf(path, "ss3", append=True, format="table") - tm.assert_frame_equal(read_hdf(path, "ss3"), concat([df, df2])) + # min_itemsize in index with to_hdf (GH 10381) + df = tm.makeMixedDataFrame().set_index("C") + df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C="longer").set_index("C") + df2.to_hdf(path, "ss3", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "ss3"), concat([df, df2])) - # same as above, with a Series - df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6}) - df2["B"].to_hdf(path, "ss4", append=True, format="table") - tm.assert_series_equal(read_hdf(path, "ss4"), concat([df["B"], df2["B"]])) + # same as above, with a Series + df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6}) + df2["B"].to_hdf(path, "ss4", append=True, format="table") + tm.assert_series_equal(read_hdf(path, "ss4"), concat([df["B"], df2["B"]])) @pytest.mark.parametrize("format", ["fixed", "table"]) -def test_to_hdf_errors(format, setup_path): +def test_to_hdf_errors(tmp_path, format, setup_path): data = ["\ud800foo"] ser = Series(data, index=Index(data)) - with ensure_clean_path(setup_path) as path: - # GH 20835 - ser.to_hdf(path, "table", format=format, errors="surrogatepass") + path = tmp_path / setup_path + # GH 20835 + ser.to_hdf(path, "table", format=format, errors="surrogatepass") - result = read_hdf(path, "table", errors="surrogatepass") - tm.assert_series_equal(result, ser) + result = read_hdf(path, "table", errors="surrogatepass") + tm.assert_series_equal(result, ser) def test_create_table_index(setup_path): @@ -556,7 +555,7 @@ def test_store_index_name(setup_path): @pytest.mark.parametrize("table_format", ["table", "fixed"]) -def test_store_index_name_numpy_str(table_format, setup_path): +def test_store_index_name_numpy_str(tmp_path, table_format, setup_path): # GH #13492 idx = Index( pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), @@ -569,14 +568,14 @@ def test_store_index_name_numpy_str(table_format, setup_path): df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) # This used to fail, returning numpy strings instead of python strings. - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format=table_format) - df2 = read_hdf(path, "df") + path = tmp_path / setup_path + df.to_hdf(path, "df", format=table_format) + df2 = read_hdf(path, "df") - tm.assert_frame_equal(df, df2, check_names=True) + tm.assert_frame_equal(df, df2, check_names=True) - assert type(df2.index.name) == str - assert type(df2.columns.name) == str + assert type(df2.index.name) == str + assert type(df2.columns.name) == str def test_store_series_name(setup_path): @@ -925,20 +924,20 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): do_copy(f=path, propindexes=False) -def test_duplicate_column_name(setup_path): +def test_duplicate_column_name(tmp_path, setup_path): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) - with ensure_clean_path(setup_path) as path: - msg = "Columns index has to be unique for fixed format" - with pytest.raises(ValueError, match=msg): - df.to_hdf(path, "df", format="fixed") + path = tmp_path / setup_path + msg = "Columns index has to be unique for fixed format" + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", format="fixed") - df.to_hdf(path, "df", format="table") - other = read_hdf(path, "df") + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") - tm.assert_frame_equal(df, other) - assert df.equals(other) - assert other.equals(df) + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) def test_preserve_timedeltaindex_type(setup_path): @@ -952,7 +951,7 @@ def test_preserve_timedeltaindex_type(setup_path): tm.assert_frame_equal(store["df"], df) -def test_columns_multiindex_modified(setup_path): +def test_columns_multiindex_modified(tmp_path, setup_path): # BUG: 7212 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) @@ -960,26 +959,26 @@ def test_columns_multiindex_modified(setup_path): df = df.set_index(keys="E", append=True) data_columns = df.index.names + df.columns.tolist() - with ensure_clean_path(setup_path) as path: - df.to_hdf( - path, - "df", - mode="a", - append=True, - data_columns=data_columns, - index=False, - ) - cols2load = list("BCD") - cols2load_original = list(cols2load) - # GH#10055 make sure read_hdf call does not alter cols2load inplace - read_hdf(path, "df", columns=cols2load) - assert cols2load_original == cols2load + path = tmp_path / setup_path + df.to_hdf( + path, + "df", + mode="a", + append=True, + data_columns=data_columns, + index=False, + ) + cols2load = list("BCD") + cols2load_original = list(cols2load) + # GH#10055 make sure read_hdf call does not alter cols2load inplace + read_hdf(path, "df", columns=cols2load) + assert cols2load_original == cols2load pytest.mark.filterwarnings("ignore:object name:tables.exceptions.NaturalNameWarning") -def test_to_hdf_with_object_column_names(setup_path): +def test_to_hdf_with_object_column_names(tmp_path, setup_path): # GH9057 types_should_fail = [ @@ -996,28 +995,28 @@ def test_to_hdf_with_object_column_names(setup_path): for index in types_should_fail: df = DataFrame(np.random.randn(10, 2), columns=index(2)) - with ensure_clean_path(setup_path) as path: - with catch_warnings(record=True): - msg = "cannot have non-object label DataIndexableCol" - with pytest.raises(ValueError, match=msg): - df.to_hdf(path, "df", format="table", data_columns=True) + path = tmp_path / setup_path + with catch_warnings(record=True): + msg = "cannot have non-object label DataIndexableCol" + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", format="table", data_columns=True) for index in types_should_run: df = DataFrame(np.random.randn(10, 2), columns=index(2)) - with ensure_clean_path(setup_path) as path: - with catch_warnings(record=True): - df.to_hdf(path, "df", format="table", data_columns=True) - result = read_hdf(path, "df", where=f"index = [{df.index[0]}]") - assert len(result) + path = tmp_path / setup_path + with catch_warnings(record=True): + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where=f"index = [{df.index[0]}]") + assert len(result) -def test_hdfstore_iteritems_deprecated(setup_path): - with ensure_clean_path(setup_path) as path: - df = DataFrame({"a": [1]}) - with HDFStore(path, mode="w") as hdf: - hdf.put("table", df) - with tm.assert_produces_warning(FutureWarning): - next(hdf.iteritems()) +def test_hdfstore_iteritems_deprecated(tmp_path, setup_path): + path = tmp_path / setup_path + df = DataFrame({"a": [1]}) + with HDFStore(path, mode="w") as hdf: + hdf.put("table", df) + with tm.assert_produces_warning(FutureWarning): + next(hdf.iteritems()) def test_hdfstore_strides(setup_path): @@ -1028,14 +1027,14 @@ def test_hdfstore_strides(setup_path): assert df["a"].values.strides == store["df"]["a"].values.strides -def test_store_bool_index(setup_path): +def test_store_bool_index(tmp_path, setup_path): # GH#48667 df = DataFrame([[1]], columns=[True], index=Index([False], dtype="bool")) expected = df.copy() # # Test to make sure defaults are to not drop. # # Corresponding to Issue 9382 - with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "a") - result = read_hdf(path, "a") - tm.assert_frame_equal(expected, result) + path = tmp_path / setup_path + df.to_hdf(path, "a") + result = read_hdf(path, "a") + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py index 75b04f332e054..6f871f59afac2 100644 --- a/pandas/tests/io/pytables/test_subclass.py +++ b/pandas/tests/io/pytables/test_subclass.py @@ -5,7 +5,6 @@ Series, ) import pandas._testing as tm -from pandas.tests.io.pytables.common import ensure_clean_path from pandas.io.pytables import ( HDFStore, @@ -15,36 +14,36 @@ class TestHDFStoreSubclass: # GH 33748 - def test_supported_for_subclass_dataframe(self): + def test_supported_for_subclass_dataframe(self, tmp_path): data = {"a": [1, 2], "b": [3, 4]} sdf = tm.SubclassedDataFrame(data, dtype=np.intp) expected = DataFrame(data, dtype=np.intp) - with ensure_clean_path("temp.h5") as path: - sdf.to_hdf(path, "df") - result = read_hdf(path, "df") - tm.assert_frame_equal(result, expected) + path = tmp_path / "temp.h5" + sdf.to_hdf(path, "df") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) - with ensure_clean_path("temp.h5") as path: - with HDFStore(path) as store: - store.put("df", sdf) - result = read_hdf(path, "df") - tm.assert_frame_equal(result, expected) + path = tmp_path / "temp.h5" + with HDFStore(path) as store: + store.put("df", sdf) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) - def test_supported_for_subclass_series(self): + def test_supported_for_subclass_series(self, tmp_path): data = [1, 2, 3] sser = tm.SubclassedSeries(data, dtype=np.intp) expected = Series(data, dtype=np.intp) - with ensure_clean_path("temp.h5") as path: - sser.to_hdf(path, "ser") - result = read_hdf(path, "ser") - tm.assert_series_equal(result, expected) + path = tmp_path / "temp.h5" + sser.to_hdf(path, "ser") + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected) - with ensure_clean_path("temp.h5") as path: - with HDFStore(path) as store: - store.put("ser", sser) - result = read_hdf(path, "ser") - tm.assert_series_equal(result, expected) + path = tmp_path / "temp.h5" + with HDFStore(path) as store: + store.put("ser", sser) + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index e235c73123eaa..ba125ffd28581 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -20,7 +20,6 @@ import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_path, ensure_clean_store, ) @@ -341,7 +340,7 @@ def test_dst_transitions(setup_path): tm.assert_frame_equal(result, df) -def test_read_with_where_tz_aware_index(setup_path): +def test_read_with_where_tz_aware_index(tmp_path, setup_path): # GH 11926 periods = 10 dts = date_range("20151201", periods=periods, freq="D", tz="UTC") @@ -349,11 +348,11 @@ def test_read_with_where_tz_aware_index(setup_path): expected = DataFrame({"MYCOL": 0}, index=mi) key = "mykey" - with ensure_clean_path(setup_path) as path: - with pd.HDFStore(path) as store: - store.append(key, expected, format="table", append=True) - result = pd.read_hdf(path, key, where="DATE > 20151130") - tm.assert_frame_equal(result, expected) + path = tmp_path / setup_path + with pd.HDFStore(path) as store: + store.append(key, expected, format="table", append=True) + result = pd.read_hdf(path, key, where="DATE > 20151130") + tm.assert_frame_equal(result, expected) def test_py2_created_with_datetimez(datapath): From 579b4598c73e6eaf0af394a4a85819c64877021e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Oct 2022 14:54:21 -0700 Subject: [PATCH 2/3] Add importorskip --- pandas/tests/io/pytables/test_subclass.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py index 6f871f59afac2..823d2875c5417 100644 --- a/pandas/tests/io/pytables/test_subclass.py +++ b/pandas/tests/io/pytables/test_subclass.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( DataFrame, @@ -11,6 +12,8 @@ read_hdf, ) +pytest.importorskip("tables") + class TestHDFStoreSubclass: # GH 33748 From d6f8380da1ead494a81496d27f45785cd165ed68 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Oct 2022 14:28:43 -0700 Subject: [PATCH 3/3] Cleanup ensure_clean_store --- pandas/tests/io/pytables/common.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index dc02f62e42f78..9446d9df3a038 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -import os +import pathlib import tempfile from typing import Generator @@ -28,24 +28,16 @@ def ensure_clean_store( path, mode="a", complevel=None, complib=None, fletcher32=False ) -> Generator[HDFStore, None, None]: - try: - - # put in the temporary path if we don't have one already - if not len(os.path.dirname(path)): - path = os.path.join(tempfile.gettempdir(), path) - - store = HDFStore( - path, mode=mode, complevel=complevel, complib=complib, fletcher32=False - ) - yield store - finally: - safe_close(store) - if mode == "w" or mode == "a": - if path is not None: - try: - os.remove(path) # noqa: PDF008 - except OSError: - pass + with tempfile.TemporaryDirectory() as tmpdirname: + tmp_path = pathlib.Path(tmpdirname, path) + with HDFStore( + tmp_path, + mode=mode, + complevel=complevel, + complib=complib, + fletcher32=fletcher32, + ) as store: + yield store def _maybe_remove(store, key):