diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 25da4f8eb8278..adc52a506fcaf 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -102,7 +102,6 @@ RNGContext, decompress_file, ensure_clean, - ensure_clean_dir, ensure_safe_environment_variables, set_timezone, use_numexpr, @@ -1087,7 +1086,6 @@ def shares_memory(left, right) -> bool: "EMPTY_STRING_PATTERN", "ENDIAN", "ensure_clean", - "ensure_clean_dir", "ensure_safe_environment_variables", "equalContents", "external_error_raised", diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index d0f4b4b14acc6..ee94c1d3aae0c 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -3,7 +3,6 @@ from contextlib import contextmanager import os from pathlib import Path -from shutil import rmtree import tempfile from types import TracebackType from typing import ( @@ -129,25 +128,6 @@ def ensure_clean( path.unlink() -@contextmanager -def ensure_clean_dir() -> Generator[str, None, None]: - """ - Get a temporary directory path and agrees to remove on close. - - Yields - ------ - Temporary directory path - """ - directory_name = tempfile.mkdtemp(suffix="") - try: - yield directory_name - finally: - try: - rmtree(directory_name) - except OSError: - pass - - @contextmanager def ensure_safe_environment_variables() -> Generator[None, None, None]: """ diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index b5096934af4cb..34ab87d727a12 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,6 +1,5 @@ import io import os -from pathlib import Path import sys from zipfile import ZipFile @@ -615,16 +614,15 @@ def test_to_csv_zip_arguments(self, compression, archive_name): ("archive.zip", "archive"), ], ) - def test_to_csv_zip_infer_name(self, filename, expected_arcname): + def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname): # GH 39465 df = DataFrame({"ABC": [1]}) - with tm.ensure_clean_dir() as dir: - path = Path(dir, filename) - df.to_csv(path, compression="zip") - with ZipFile(path) as zp: - assert len(zp.filelist) == 1 - archived_file = zp.filelist[0].filename - assert archived_file == expected_arcname + path = tmp_path / filename + df.to_csv(path, compression="zip") + with ZipFile(path) as zp: + assert len(zp.filelist) == 1 + archived_file = zp.filelist[0].filename + assert archived_file == expected_arcname @pytest.mark.parametrize("df_new_type", ["Int64"]) def test_to_csv_na_rep_long_string(self, df_new_type): diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 125d078ff39b1..89ecde58735a7 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -298,22 +298,21 @@ def test_ambiguous_archive_zip(): pd.read_csv(path) -def test_ambiguous_archive_tar(): - with tm.ensure_clean_dir() as dir: - csvAPath = os.path.join(dir, "a.csv") - with open(csvAPath, "w") as a: - a.write("foo,bar\n") - csvBPath = os.path.join(dir, "b.csv") - with open(csvBPath, "w") as b: - b.write("foo,bar\n") - - tarpath = os.path.join(dir, "archive.tar") - with tarfile.TarFile(tarpath, "w") as tar: - tar.add(csvAPath, "a.csv") - tar.add(csvBPath, "b.csv") - - with pytest.raises(ValueError, match="Multiple files found in TAR archive"): - pd.read_csv(tarpath) +def test_ambiguous_archive_tar(tmp_path): + csvAPath = tmp_path / "a.csv" + with open(csvAPath, "w") as a: + a.write("foo,bar\n") + csvBPath = tmp_path / "b.csv" + with open(csvBPath, "w") as b: + b.write("foo,bar\n") + + tarpath = tmp_path / "archive.tar" + with tarfile.TarFile(tarpath, "w") as tar: + tar.add(csvAPath, "a.csv") + tar.add(csvBPath, "b.csv") + + with pytest.raises(ValueError, match="Multiple files found in TAR archive"): + pd.read_csv(tarpath) def test_tar_gz_to_different_filename(): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 64e4a15a42061..66312468b53c9 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -879,37 +879,36 @@ def test_expand_user(self, df_compat, monkeypatch): with pytest.raises(OSError, match=r".*TestingUser.*"): df_compat.to_parquet("~/file.parquet") - def test_partition_cols_supported(self, pa, df_full): + def test_partition_cols_supported(self, tmp_path, pa, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full - with tm.ensure_clean_dir() as path: - df.to_parquet(path, partition_cols=partition_cols, compression=None) - check_partition_names(path, partition_cols) - assert read_parquet(path).shape == df.shape + df.to_parquet(tmp_path, partition_cols=partition_cols, compression=None) + check_partition_names(tmp_path, partition_cols) + assert read_parquet(tmp_path).shape == df.shape - def test_partition_cols_string(self, pa, df_full): + def test_partition_cols_string(self, tmp_path, pa, df_full): # GH #27117 partition_cols = "bool" partition_cols_list = [partition_cols] df = df_full - with tm.ensure_clean_dir() as path: - df.to_parquet(path, partition_cols=partition_cols, compression=None) - check_partition_names(path, partition_cols_list) - assert read_parquet(path).shape == df.shape + df.to_parquet(tmp_path, partition_cols=partition_cols, compression=None) + check_partition_names(tmp_path, partition_cols_list) + assert read_parquet(tmp_path).shape == df.shape - @pytest.mark.parametrize("path_type", [str, pathlib.Path]) - def test_partition_cols_pathlib(self, pa, df_compat, path_type): + @pytest.mark.parametrize( + "path_type", [str, lambda x: x], ids=["string", "pathlib.Path"] + ) + def test_partition_cols_pathlib(self, tmp_path, pa, df_compat, path_type): # GH 35902 partition_cols = "B" partition_cols_list = [partition_cols] df = df_compat - with tm.ensure_clean_dir() as path_str: - path = path_type(path_str) - df.to_parquet(path, partition_cols=partition_cols_list) - assert read_parquet(path).shape == df.shape + path = path_type(tmp_path) + df.to_parquet(path, partition_cols=partition_cols_list) + assert read_parquet(path).shape == df.shape def test_empty_dataframe(self, pa): # GH #27339 @@ -1082,58 +1081,57 @@ def test_s3_roundtrip(self, df_compat, s3_resource, fp, s3so): write_kwargs={"compression": None, "storage_options": s3so}, ) - def test_partition_cols_supported(self, fp, df_full): + def test_partition_cols_supported(self, tmp_path, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full - with tm.ensure_clean_dir() as path: - df.to_parquet( - path, - engine="fastparquet", - partition_cols=partition_cols, - compression=None, - ) - assert os.path.exists(path) - import fastparquet + df.to_parquet( + tmp_path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(tmp_path) + import fastparquet - actual_partition_cols = fastparquet.ParquetFile(path, False).cats - assert len(actual_partition_cols) == 2 + actual_partition_cols = fastparquet.ParquetFile(str(tmp_path), False).cats + assert len(actual_partition_cols) == 2 - def test_partition_cols_string(self, fp, df_full): + def test_partition_cols_string(self, tmp_path, fp, df_full): # GH #27117 partition_cols = "bool" df = df_full - with tm.ensure_clean_dir() as path: - df.to_parquet( - path, - engine="fastparquet", - partition_cols=partition_cols, - compression=None, - ) - assert os.path.exists(path) - import fastparquet + df.to_parquet( + tmp_path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(tmp_path) + import fastparquet - actual_partition_cols = fastparquet.ParquetFile(path, False).cats - assert len(actual_partition_cols) == 1 + actual_partition_cols = fastparquet.ParquetFile(str(tmp_path), False).cats + assert len(actual_partition_cols) == 1 - def test_partition_on_supported(self, fp, df_full): + def test_partition_on_supported(self, tmp_path, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full - with tm.ensure_clean_dir() as path: - df.to_parquet( - path, - engine="fastparquet", - compression=None, - partition_on=partition_cols, - ) - assert os.path.exists(path) - import fastparquet + df.to_parquet( + tmp_path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + ) + assert os.path.exists(tmp_path) + import fastparquet - actual_partition_cols = fastparquet.ParquetFile(path, False).cats - assert len(actual_partition_cols) == 2 + actual_partition_cols = fastparquet.ParquetFile(str(tmp_path), False).cats + assert len(actual_partition_cols) == 2 - def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): + def test_error_on_using_partition_cols_and_partition_on( + self, tmp_path, fp, df_full + ): # GH #23283 partition_cols = ["bool", "int"] df = df_full @@ -1142,14 +1140,13 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): "partitioning data" ) with pytest.raises(ValueError, match=msg): - with tm.ensure_clean_dir() as path: - df.to_parquet( - path, - engine="fastparquet", - compression=None, - partition_on=partition_cols, - partition_cols=partition_cols, - ) + df.to_parquet( + tmp_path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + partition_cols=partition_cols, + ) def test_empty_dataframe(self, fp): # GH #27339 diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index d73a789b876f4..b7ed9415ecb90 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -44,13 +44,6 @@ def test_convert_rows_list_to_csv_str(): assert ret == expected -def test_create_temp_directory(): - with tm.ensure_clean_dir() as path: - assert os.path.exists(path) - assert os.path.isdir(path) - assert not os.path.exists(path) - - @pytest.mark.parametrize("strict_data_files", [True, False]) def test_datapath_missing(datapath): with pytest.raises(ValueError, match="Could not find file"):