diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index f11ec5f8a36a0..13efc0f60ecef 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -4,7 +4,7 @@ import pandas as pd import pandas._testing as tm -pa = pytest.importorskip("pyarrow", minversion="1.0.1") +pa = pytest.importorskip("pyarrow") from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index b3bc2c7166130..a801a845bc7be 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -95,7 +95,7 @@ def test_constructor_valid_string_type_value_dictionary(chunked): def test_constructor_from_list(): # GH#27673 - pytest.importorskip("pyarrow", minversion="1.0.0") + pytest.importorskip("pyarrow") result = pd.Series(["E"], dtype=StringDtype(storage="pyarrow")) assert isinstance(result.dtype, StringDtype) assert result.dtype.storage == "pyarrow" diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index cb44453f55e5e..adbc4a1bb729b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -60,7 +60,7 @@ ) from pandas.tests.extension import base -pa = pytest.importorskip("pyarrow", minversion="10.0.1") +pa = pytest.importorskip("pyarrow") from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.arrays.arrow.extension_types import ArrowPeriodType diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 0bb7ad4fd274d..309c4b7b57e84 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -190,9 +190,9 @@ def test_filter_pdna_is_false(): tm.assert_series_equal(res, ser[[]]) -def test_filter_against_workaround(): +def test_filter_against_workaround_ints(): # Series of ints - s = Series(np.random.default_rng(2).integers(0, 100, 1000)) + s = Series(np.random.default_rng(2).integers(0, 100, 100)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -201,8 +201,10 @@ def test_filter_against_workaround(): new_way = grouped.filter(f) tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + +def test_filter_against_workaround_floats(): # Series of floats - s = 100 * Series(np.random.default_rng(2).random(1000)) + s = 100 * Series(np.random.default_rng(2).random(100)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -210,9 +212,11 @@ def test_filter_against_workaround(): new_way = grouped.filter(f) tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + +def test_filter_against_workaround_dataframe(): # Set up DataFrame of ints, floats, strings. letters = np.array(list(ascii_lowercase)) - N = 1000 + N = 100 random_letters = letters.take( np.random.default_rng(2).integers(0, 26, N, dtype=int) ) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index c4e23154b7ffc..0ebb88afb6c86 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -9,6 +9,7 @@ DataFrame, DatetimeIndex, Index, + MultiIndex, Series, Timedelta, Timestamp, @@ -360,10 +361,12 @@ def test_partial_slicing_with_multiindex(self): def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi - ser = DataFrame( - np.random.default_rng(2).random((1000, 1000)), - index=date_range("2000-1-1", periods=1000), - ).stack(future_stack=True) + ser = Series( + range(250), + index=MultiIndex.from_product( + [date_range("2000-1-1", periods=50), range(5)] + ), + ) s2 = ser[:-1].copy() expected = s2["2000-1-4"] diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 26ef732635d1c..d956747cbc859 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -125,18 +125,20 @@ def test_consistency(): @pytest.mark.slow -def test_hash_collisions(): +def test_hash_collisions(monkeypatch): # non-smoke test that we don't get hash collisions + size_cutoff = 50 + with monkeypatch.context() as m: + m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + index = MultiIndex.from_product( + [np.arange(8), np.arange(8)], names=["one", "two"] + ) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) - index = MultiIndex.from_product( - [np.arange(1000), np.arange(1000)], names=["one", "two"] - ) - result = index.get_indexer(index.values) - tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) - - for i in [0, 1, len(index) - 2, len(index) - 1]: - result = index.get_loc(index[i]) - assert result == i + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i def test_dims(): @@ -170,22 +172,29 @@ def test_isna_behavior(idx): pd.isna(idx) -def test_large_multiindex_error(): +def test_large_multiindex_error(monkeypatch): # GH12527 - df_below_1000000 = pd.DataFrame( - 1, index=MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"] - ) - with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): - df_below_1000000.loc[(-1, 0), "dest"] - with pytest.raises(KeyError, match=r"^\(3, 0\)$"): - df_below_1000000.loc[(3, 0), "dest"] - df_above_1000000 = pd.DataFrame( - 1, index=MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"] - ) - with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): - df_above_1000000.loc[(-1, 0), "dest"] - with pytest.raises(KeyError, match=r"^\(3, 0\)$"): - df_above_1000000.loc[(3, 0), "dest"] + size_cutoff = 50 + with monkeypatch.context() as m: + m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + df_below_cutoff = pd.DataFrame( + 1, + index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]), + columns=["dest"], + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_below_cutoff.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_below_cutoff.loc[(3, 0), "dest"] + df_above_cutoff = pd.DataFrame( + 1, + index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]), + columns=["dest"], + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_above_cutoff.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_above_cutoff.loc[(3, 0), "dest"] def test_mi_hashtable_populated_attribute_error(monkeypatch): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 32adbc693390b..30e3f5cee05b4 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime -import gc +import weakref import numpy as np import pytest @@ -744,10 +744,11 @@ def test_is_unique(self, simple_index): @pytest.mark.arm_slow def test_engine_reference_cycle(self, simple_index): # GH27585 - index = simple_index - nrefs_pre = len(gc.get_referrers(index)) + index = simple_index.copy() + ref = weakref.ref(index) index._engine - assert len(gc.get_referrers(index)) == nrefs_pre + del index + assert ref() is None def test_getitem_2d_deprecated(self, simple_index): # GH#30588, GH#31479 diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 701bfe3767db4..d3552ab5d39f5 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -96,7 +96,7 @@ def s3_base(worker_id, monkeysession): yield "http://localhost:5000" else: requests = pytest.importorskip("requests") - pytest.importorskip("moto", minversion="1.3.14") + pytest.importorskip("moto") pytest.importorskip("flask") # server mode needs flask too # Launching moto in server mode, i.e., as a separate process diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index c029acf0c8938..f5d78fbd44812 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -95,7 +95,7 @@ def test_left_join_multi_index(self, sort): def bind_cols(df): iord = lambda a: 0 if a != a else ord(a) f = lambda ts: ts.map(iord) - ord("a") - return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4 + return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 10 def run_asserts(left, right, sort): res = left.join(right, on=icols, how="left", sort=sort) @@ -119,13 +119,13 @@ def run_asserts(left, right, sort): lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) left = DataFrame( - np.random.default_rng(2).choice(lc, (5000, 2)), columns=["1st", "3rd"] + np.random.default_rng(2).choice(lc, (50, 2)), columns=["1st", "3rd"] ) # Explicit cast to float to avoid implicit cast when setting nan left.insert( 1, "2nd", - np.random.default_rng(2).integers(0, 1000, len(left)).astype("float"), + np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), ) i = np.random.default_rng(2).permutation(len(left)) @@ -138,9 +138,9 @@ def run_asserts(left, right, sort): run_asserts(left, right, sort) # inject some nulls - left.loc[1::23, "1st"] = np.nan - left.loc[2::37, "2nd"] = np.nan - left.loc[3::43, "3rd"] = np.nan + left.loc[1::4, "1st"] = np.nan + left.loc[2::5, "2nd"] = np.nan + left.loc[3::6, "3rd"] = np.nan left["4th"] = bind_cols(left) i = np.random.default_rng(2).permutation(len(left)) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 08a1c8e3aebb2..5903255118d40 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -100,7 +100,7 @@ def test_xarray(df): def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 cftime = pytest.importorskip("cftime") - xarray = pytest.importorskip("xarray", minversion="0.21.0") + xarray = pytest.importorskip("xarray") times = xarray.cftime_range("0001", periods=2) key = cftime.DatetimeGregorian(2000, 1, 1)