diff --git a/pandas/tests/apply/conftest.py b/pandas/tests/apply/conftest.py index 7ed9fc88c3aea..acccdd845b53c 100644 --- a/pandas/tests/apply/conftest.py +++ b/pandas/tests/apply/conftest.py @@ -18,7 +18,7 @@ def int_frame_const_col(): return df -@pytest.fixture(params=["python", "numba"]) +@pytest.fixture(params=["python", pytest.param("numba", marks=pytest.mark.single_cpu)]) def engine(request): if request.param == "numba": pytest.importorskip("numba") diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 25eb2ccb18361..312d6f6d37dde 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -392,27 +392,57 @@ def test_missing_axis_specification_exception(self): with pytest.raises(ValueError, match=r"axis=0 or 1"): df.align(series) - def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): + @pytest.mark.parametrize("method", ["pad", "bfill"]) + @pytest.mark.parametrize("axis", [0, 1, None]) + @pytest.mark.parametrize("fill_axis", [0, 1]) + @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) + @pytest.mark.parametrize( + "left_slice", + [ + [slice(4), slice(10)], + [slice(0), slice(0)], + ], + ) + @pytest.mark.parametrize( + "right_slice", + [ + [slice(2, None), slice(6, None)], + [slice(0), slice(0)], + ], + ) + @pytest.mark.parametrize("limit", [1, None]) + def test_align_fill_method( + self, how, method, axis, fill_axis, float_frame, left_slice, right_slice, limit + ): + frame = float_frame + left = frame.iloc[left_slice[0], left_slice[1]] + right = frame.iloc[right_slice[0], right_slice[1]] + msg = ( "The 'method', 'limit', and 'fill_axis' keywords in DataFrame.align " "are deprecated" ) with tm.assert_produces_warning(FutureWarning, match=msg): - aa, ab = a.align( - b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis + aa, ab = left.align( + right, + axis=axis, + join=how, + method=method, + limit=limit, + fill_axis=fill_axis, ) join_index, join_columns = None, None - ea, eb = a, b + ea, eb = left, right if axis is None or axis == 0: - join_index = a.index.join(b.index, how=how) + join_index = left.index.join(right.index, how=how) ea = ea.reindex(index=join_index) eb = eb.reindex(index=join_index) if axis is None or axis == 1: - join_columns = a.columns.join(b.columns, how=how) + join_columns = left.columns.join(right.columns, how=how) ea = ea.reindex(columns=join_columns) eb = eb.reindex(columns=join_columns) @@ -424,42 +454,6 @@ def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): tm.assert_frame_equal(aa, ea) tm.assert_frame_equal(ab, eb) - @pytest.mark.parametrize("meth", ["pad", "bfill"]) - @pytest.mark.parametrize("ax", [0, 1, None]) - @pytest.mark.parametrize("fax", [0, 1]) - @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) - def test_align_fill_method(self, how, meth, ax, fax, float_frame): - df = float_frame - self._check_align_fill(df, how, meth, ax, fax) - - def _check_align_fill(self, frame, kind, meth, ax, fax): - left = frame.iloc[0:4, :10] - right = frame.iloc[2:, 6:] - empty = frame.iloc[:0, :0] - - self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # empty left - self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # empty right - self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # both empty - self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - def test_align_series_check_copy(self): # GH# df = DataFrame({0: [1, 2]}) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index a65677bba35e4..eae7e46c7ec35 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from pandas._libs import index as libindex from pandas.errors import ( InvalidIndexError, PerformanceWarning, @@ -843,11 +844,12 @@ def test_contains_td64_level(self): assert "element_not_exit" not in idx assert "0 day 09:30:00" in idx - @pytest.mark.slow - def test_large_mi_contains(self): + def test_large_mi_contains(self, monkeypatch): # GH#10645 - result = MultiIndex.from_arrays([range(10**6), range(10**6)]) - assert (10**6, 0) not in result + with monkeypatch.context(): + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 10) + result = MultiIndex.from_arrays([range(10), range(10)]) + assert (10, 0) not in result def test_timestamp_multiindex_indexer(): diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index ae25724972fde..cabfee9aa040a 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas._libs import index as libindex from pandas.compat import IS64 import pandas as pd @@ -72,15 +73,18 @@ def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): with pytest.raises(KeyError, match=r"\[-1\] not in index"): indexer_sl(ser)[[-1, 3]] - @pytest.mark.slow - def test_loc_getitem_large_series(self): - ser = Series( - np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) - ) - - result1 = ser.loc[:80000] - result2 = ser.loc[0:80000] - result3 = ser.loc[0:80000:1] + def test_loc_getitem_large_series(self, monkeypatch): + size_cutoff = 20 + with monkeypatch.context(): + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + ser = Series( + np.arange(size_cutoff), + index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)), + ) + + result1 = ser.loc[:8] + result2 = ser.loc[0:8] + result3 = ser.loc[0:8:1] tm.assert_series_equal(result1, result2) tm.assert_series_equal(result1, result3) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 2914bf4a3be05..1fe3a1275d8d6 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas._libs import index as libindex from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td @@ -69,15 +70,16 @@ def test_cache_updating(using_copy_on_write): assert result == 2 -@pytest.mark.slow -def test_indexer_caching(): +def test_indexer_caching(monkeypatch): # GH5727 # make sure that indexers are in the _internal_names_set - n = 1000001 - index = MultiIndex.from_arrays([np.arange(n), np.arange(n)]) - ser = Series(np.zeros(n), index=index) + size_cutoff = 20 + with monkeypatch.context(): + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)]) + s = Series(np.zeros(size_cutoff), index=index) - # setitem - expected = Series(np.ones(n), index=index) - ser[ser == 0] = 1 - tm.assert_series_equal(ser, expected) + # setitem + s[s == 0] = 1 + expected = Series(np.ones(size_cutoff), index=index) + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 9643cf3258e64..5550daebac0b2 100644 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -124,7 +124,7 @@ def _create_sp_frame(): return DataFrame(data, index=dates).apply(SparseArray) -def create_data(): +def create_pickle_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], @@ -282,12 +282,6 @@ def create_data(): } -def create_pickle_data(): - data = create_data() - - return data - - def platform_name(): return "_".join( [ diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index e2d785a38eb51..fef5414e85e52 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -298,6 +298,8 @@ def test_empty_field_eof(self): } assert_array_dicts_equal(result, expected) + @pytest.mark.parametrize("repeat", range(10)) + def test_empty_field_eof_mem_access_bug(self, repeat): # GH5664 a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"]) b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1]) @@ -312,21 +314,20 @@ def test_empty_field_eof(self): index=[0, 5, 7, 12], ) - for _ in range(100): - df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c") - tm.assert_frame_equal(df, a) + df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c") + tm.assert_frame_equal(df, a) - df = read_csv( - StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c" - ) - tm.assert_frame_equal(df, b) + df = read_csv( + StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c" + ) + tm.assert_frame_equal(df, b) - df = read_csv( - StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"), - names=list("abcd"), - engine="c", - ) - tm.assert_frame_equal(df, c) + df = read_csv( + StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"), + names=list("abcd"), + engine="c", + ) + tm.assert_frame_equal(df, c) def test_empty_csv_input(self): # GH14867 diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index bb8f0ce214c96..780b25fd0f346 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -10,6 +10,8 @@ 3. Move the created pickle to "data/legacy_pickle/" directory. """ +from __future__ import annotations + from array import array import bz2 import datetime @@ -22,6 +24,7 @@ import pickle import shutil import tarfile +from typing import Any import uuid import zipfile @@ -52,12 +55,6 @@ ) -@pytest.fixture -def current_pickle_data(): - # our current version pickle data - return create_pickle_data() - - # --------------------- # comparison functions # --------------------- @@ -173,6 +170,15 @@ def python_unpickler(path): return pickle.load(fh) +def flatten(data: dict) -> list[tuple[str, Any]]: + """Flatten create_pickle_data""" + return [ + (typ, example) + for typ, examples in data.items() + for example in examples.values() + ] + + @pytest.mark.parametrize( "pickle_writer", [ @@ -190,29 +196,27 @@ def python_unpickler(path): ], ) @pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler]) -def test_round_trip_current(current_pickle_data, pickle_writer, writer): - data = current_pickle_data - for typ, dv in data.items(): - for dt, expected in dv.items(): - with tm.ensure_clean() as path: - # test writing with each pickler - pickle_writer(expected, path) - - # test reading with each unpickler - result = pd.read_pickle(path) - compare_element(result, expected, typ) - - result = python_unpickler(path) - compare_element(result, expected, typ) - - # and the same for file objects (GH 35679) - with open(path, mode="wb") as handle: - writer(expected, path) - handle.seek(0) # shouldn't close file handle - with open(path, mode="rb") as handle: - result = pd.read_pickle(handle) - handle.seek(0) # shouldn't close file handle - compare_element(result, expected, typ) +@pytest.mark.parametrize("typ, expected", flatten(create_pickle_data())) +def test_round_trip_current(typ, expected, pickle_writer, writer): + with tm.ensure_clean() as path: + # test writing with each pickler + pickle_writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + # and the same for file objects (GH 35679) + with open(path, mode="wb") as handle: + writer(expected, path) + handle.seek(0) # shouldn't close file handle + with open(path, mode="rb") as handle: + result = pd.read_pickle(handle) + handle.seek(0) # shouldn't close file handle + compare_element(result, expected, typ) def test_pickle_path_pathlib(): diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 105ffe84a0703..f288921c25753 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -197,7 +197,7 @@ def test_concat_duplicates_in_index_with_keys(self): @pytest.mark.parametrize("axis", [0, 1]) def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write): # based on asv ConcatDataFrames - df = DataFrame(np.zeros((10000, 200), dtype=np.float32, order=order)) + df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order)) res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True)