diff --git a/pandas/conftest.py b/pandas/conftest.py index e12acb5dd56d5..903e1a5dec132 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,3 +1,23 @@ +""" +This file is very long and growing, but it was decided to not split it yet, as +it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 + +Instead of splitting it was decided to define sections here: +- Configuration / Settings +- Autouse fixtures +- Common arguments +- Missing values & co. +- Classes +- Indices +- Series' +- DataFrames +- Operators & Operations +- Data sets/files +- Time zones +- Dtypes +- Misc +""" + from collections import abc from datetime import date, time, timedelta, timezone from decimal import Decimal @@ -19,19 +39,11 @@ from pandas.core import ops from pandas.core.indexes.api import Index, MultiIndex -hypothesis.settings.register_profile( - "ci", - # Hypothesis timing checks are tuned for scalars by default, so we bump - # them from 200ms to 500ms per test case as the global default. If this - # is too short for a specific test, (a) try to make it faster, and (b) - # if it really is slow add `@settings(deadline=...)` with a working value, - # or `deadline=None` to entirely disable timeouts for that test. - deadline=500, - suppress_health_check=(hypothesis.HealthCheck.too_slow,), -) -hypothesis.settings.load_profile("ci") - +# ---------------------------------------------------------------- +# Configuration / Settings +# ---------------------------------------------------------------- +# pytest def pytest_addoption(parser): parser.addoption("--skip-slow", action="store_true", help="skip slow tests") parser.addoption("--skip-network", action="store_true", help="skip network tests") @@ -66,6 +78,55 @@ def pytest_runtest_setup(item): pytest.skip("skipping high memory test since --run-high-memory was not set") +# Hypothesis +hypothesis.settings.register_profile( + "ci", + # Hypothesis timing checks are tuned for scalars by default, so we bump + # them from 200ms to 500ms per test case as the global default. If this + # is too short for a specific test, (a) try to make it faster, and (b) + # if it really is slow add `@settings(deadline=...)` with a working value, + # or `deadline=None` to entirely disable timeouts for that test. + deadline=500, + suppress_health_check=(hypothesis.HealthCheck.too_slow,), +) +hypothesis.settings.load_profile("ci") + +# Registering these strategies makes them globally available via st.from_type, +# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) + +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) + + +# ---------------------------------------------------------------- +# Autouse fixtures +# ---------------------------------------------------------------- @pytest.fixture(autouse=True) def configure_tests(): """ @@ -83,16 +144,9 @@ def add_imports(doctest_namespace): doctest_namespace["pd"] = pd -@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) -def spmatrix(request): - """ - Yields scipy sparse matrix classes. - """ - from scipy import sparse - - return getattr(sparse, request.param + "_matrix") - - +# ---------------------------------------------------------------- +# Common arguments +# ---------------------------------------------------------------- @pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}") def axis(request): """ @@ -112,19 +166,6 @@ def axis_series(request): return request.param -@pytest.fixture -def ip(): - """ - Get an instance of IPython.InteractiveShell. - - Will raise a skip if IPython is not installed. - """ - pytest.importorskip("IPython", minversion="6.0.0") - from IPython.core.interactiveshell import InteractiveShell - - return InteractiveShell() - - @pytest.fixture(params=[True, False, None]) def observed(request): """ @@ -146,338 +187,547 @@ def ordered_fixture(request): return request.param -_all_arithmetic_operators = [ - "__add__", - "__radd__", - "__sub__", - "__rsub__", - "__mul__", - "__rmul__", - "__floordiv__", - "__rfloordiv__", - "__truediv__", - "__rtruediv__", - "__pow__", - "__rpow__", - "__mod__", - "__rmod__", -] - - -@pytest.fixture(params=_all_arithmetic_operators) -def all_arithmetic_operators(request): +@pytest.fixture(params=["first", "last", False]) +def keep(request): """ - Fixture for dunder names for common arithmetic operations. + Valid values for the 'keep' parameter used in + .duplicated or .drop_duplicates """ return request.param -@pytest.fixture( - params=[ - operator.add, - ops.radd, - operator.sub, - ops.rsub, - operator.mul, - ops.rmul, - operator.truediv, - ops.rtruediv, - operator.floordiv, - ops.rfloordiv, - operator.mod, - ops.rmod, - operator.pow, - ops.rpow, - ] -) -def all_arithmetic_functions(request): +@pytest.fixture(params=["left", "right", "both", "neither"]) +def closed(request): """ - Fixture for operator and roperator arithmetic functions. - - Notes - ----- - This includes divmod and rdivmod, whereas all_arithmetic_operators - does not. + Fixture for trying all interval closed parameters. """ return request.param -_all_numeric_reductions = [ - "sum", - "max", - "min", - "mean", - "prod", - "std", - "var", - "median", - "kurt", - "skew", -] - - -@pytest.fixture(params=_all_numeric_reductions) -def all_numeric_reductions(request): +@pytest.fixture(params=["left", "right", "both", "neither"]) +def other_closed(request): """ - Fixture for numeric reduction names. + Secondary closed fixture to allow parametrizing over all pairs of closed. """ return request.param -_all_boolean_reductions = ["all", "any"] - - -@pytest.fixture(params=_all_boolean_reductions) -def all_boolean_reductions(request): +@pytest.fixture(params=[None, "gzip", "bz2", "zip", "xz"]) +def compression(request): """ - Fixture for boolean reduction names. + Fixture for trying common compression types in compression tests. """ return request.param -_cython_table = pd.core.base.SelectionMixin._cython_table.items() - - -@pytest.fixture(params=list(_cython_table)) -def cython_table_items(request): +@pytest.fixture(params=["gzip", "bz2", "zip", "xz"]) +def compression_only(request): """ - Yields a tuple of a function and its corresponding name. Correspond to - the list of aggregator "Cython functions" used on selected table items. + Fixture for trying common compression types in compression tests excluding + uncompressed case. """ return request.param -def _get_cython_table_params(ndframe, func_names_and_expected): +@pytest.fixture(params=[True, False]) +def writable(request): """ - Combine frame, functions from SelectionMixin._cython_table - keys and expected result. - - Parameters - ---------- - ndframe : DataFrame or Series - func_names_and_expected : Sequence of two items - The first item is a name of a NDFrame method ('sum', 'prod') etc. - The second item is the expected return value. - - Returns - ------- - list - List of three items (DataFrame, function, expected result) + Fixture that an array is writable. """ - results = [] - for func_name, expected in func_names_and_expected: - results.append((ndframe, func_name, expected)) - results += [ - (ndframe, func, expected) - for func, name in _cython_table - if name == func_name - ] - return results + return request.param -@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) -def all_compare_operators(request): +@pytest.fixture(params=["inner", "outer", "left", "right"]) +def join_type(request): """ - Fixture for dunder names for common compare operations - - * >= - * > - * == - * != - * < - * <= + Fixture for trying all types of join operations. """ return request.param -@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) -def compare_operators_no_eq_ne(request): +@pytest.fixture(params=["nlargest", "nsmallest"]) +def nselect_method(request): """ - Fixture for dunder names for compare operations except == and != - - * >= - * > - * < - * <= + Fixture for trying all nselect methods. """ return request.param -@pytest.fixture( - params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] -) -def all_logical_operators(request): +# ---------------------------------------------------------------- +# Missing values & co. +# ---------------------------------------------------------------- +@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA]) +def nulls_fixture(request): """ - Fixture for dunder names for common logical operations - - * | - * & - * ^ + Fixture for each null type in pandas. """ return request.param -@pytest.fixture(params=[None, "gzip", "bz2", "zip", "xz"]) -def compression(request): +nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture + + +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): """ - Fixture for trying common compression types in compression tests. + Fixture for each null type in pandas, each null type exactly once. """ return request.param -@pytest.fixture(params=["gzip", "bz2", "zip", "xz"]) -def compression_only(request): - """ - Fixture for trying common compression types in compression tests excluding - uncompressed case. - """ - return request.param +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture -@pytest.fixture(params=[True, False]) -def writable(request): +# ---------------------------------------------------------------- +# Classes +# ---------------------------------------------------------------- +@pytest.fixture(params=[pd.Index, pd.Series], ids=["index", "series"]) +def index_or_series(request): """ - Fixture that an array is writable. + Fixture to parametrize over Index and Series, made necessary by a mypy + bug, giving an error: + + List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + + See GH#29725 """ return request.param -@pytest.fixture(scope="module") -def datetime_tz_utc(): +@pytest.fixture +def dict_subclass(): """ - Yields the UTC timezone object from the datetime module. + Fixture for a dictionary subclass. """ - return timezone.utc + class TestSubDict(dict): + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) -@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]) -def utc_fixture(request): - """ - Fixture to provide variants of UTC timezone strings and tzinfo objects. - """ - return request.param + return TestSubDict -@pytest.fixture(params=["inner", "outer", "left", "right"]) -def join_type(request): +@pytest.fixture +def non_mapping_dict_subclass(): """ - Fixture for trying all types of join operations. + Fixture for a non-mapping dictionary subclass. """ - return request.param + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict): + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self): + return self._data.__iter__() + + def __len__(self): + return self._data.__len__() + + return TestNonDictMapping +# ---------------------------------------------------------------- +# Indices +# ---------------------------------------------------------------- @pytest.fixture -def strict_data_files(pytestconfig): +def multiindex_year_month_day_dataframe_random_data(): """ - Returns the configuration for the test setting `--strict-data-files`. + DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data """ - return pytestconfig.getoption("--strict-data-files") + tdf = tm.makeTimeDataFrame(100) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use Int64Index, to make sure things work + ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd -@pytest.fixture -def datapath(strict_data_files): +def _create_multiindex(): + """ + MultiIndex used to test the general functionality of this object """ - Get the path to a data file. - Parameters - ---------- - path : str - Path to the file, relative to ``pandas/tests/`` + # See Also: tests.multi.conftest.idx + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) - Returns - ------- - path including ``pandas/tests``. + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi - Raises - ------ - ValueError - If the path doesn't exist and the --strict-data-files option is set. + +indices_dict = { + "unicode": tm.makeUnicodeIndex(100), + "string": tm.makeStringIndex(100), + "datetime": tm.makeDateIndex(100), + "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), + "period": tm.makePeriodIndex(100), + "timedelta": tm.makeTimedeltaIndex(100), + "int": tm.makeIntIndex(100), + "uint": tm.makeUIntIndex(100), + "range": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "bool": tm.makeBoolIndex(10), + "categorical": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "multi": _create_multiindex(), + "repeats": Index([0, 0, 1, 1, 2, 2]), +} + + +@pytest.fixture(params=indices_dict.keys()) +def indices(request): """ - BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") + Fixture for many "simple" kinds of indices. - def deco(*args): - path = os.path.join(BASE_PATH, *args) - if not os.path.exists(path): - if strict_data_files: - raise ValueError( - f"Could not find file {path} and --strict-data-files is set." - ) - else: - pytest.skip(f"Could not find {path}.") - return path + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() - return deco + +# ---------------------------------------------------------------- +# Series' +# ---------------------------------------------------------------- +@pytest.fixture +def empty_series(): + return pd.Series([], index=[], dtype=np.float64) @pytest.fixture -def iris(datapath): +def string_series(): """ - The iris dataset as a DataFrame. + Fixture for Series of floats with Index of unique strings """ - return pd.read_csv(datapath("data", "iris.csv")) + s = tm.makeStringSeries() + s.name = "series" + return s -@pytest.fixture(params=["nlargest", "nsmallest"]) -def nselect_method(request): +@pytest.fixture +def object_series(): """ - Fixture for trying all nselect methods. + Fixture for Series of dtype object with Index of unique strings """ - return request.param + s = tm.makeObjectSeries() + s.name = "objects" + return s -@pytest.fixture(params=["first", "last", False]) -def keep(request): +@pytest.fixture +def datetime_series(): """ - Valid values for the 'keep' parameter used in - .duplicated or .drop_duplicates + Fixture for Series of floats with DatetimeIndex """ - return request.param + s = tm.makeTimeSeries() + s.name = "ts" + return s -@pytest.fixture(params=["left", "right", "both", "neither"]) -def closed(request): - """ - Fixture for trying all interval closed parameters. - """ - return request.param +def _create_series(index): + """ Helper for the _series dict """ + size = len(index) + data = np.random.randn(size) + return pd.Series(data, index=index, name="a") -@pytest.fixture(params=["left", "right", "both", "neither"]) -def other_closed(request): +_series = { + f"series-with-{index_id}-index": _create_series(index) + for index_id, index in indices_dict.items() +} + + +@pytest.fixture +def series_with_simple_index(indices): """ - Secondary closed fixture to allow parametrizing over all pairs of closed. + Fixture for tests on series with changing types of indices. """ - return request.param + return _create_series(indices) -@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA]) -def nulls_fixture(request): +_narrow_dtypes = [ + np.float16, + np.float32, + np.int8, + np.int16, + np.int32, + np.uint8, + np.uint16, + np.uint32, +] +_narrow_series = { + f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) + for dtype in _narrow_dtypes +} + + +@pytest.fixture(params=_narrow_series.keys()) +def narrow_series(request): """ - Fixture for each null type in pandas. + Fixture for Series with low precision data types """ - return request.param + # copy to avoid mutation, e.g. setting .name + return _narrow_series[request.param].copy() -nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture +_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} -@pytest.fixture(params=[None, np.nan, pd.NaT]) -def unique_nulls_fixture(request): +@pytest.fixture(params=_index_or_series_objs.keys()) +def index_or_series_obj(request): """ - Fixture for each null type in pandas, each null type exactly once. + Fixture for tests on indexes, series and series with a narrow dtype + copy to avoid mutation, e.g. setting .name """ - return request.param + return _index_or_series_objs[request.param].copy(deep=True) -# Generate cartesian product of unique_nulls_fixture: -unique_nulls_fixture2 = unique_nulls_fixture +# ---------------------------------------------------------------- +# DataFrames +# ---------------------------------------------------------------- +@pytest.fixture +def float_frame(): + """ + Fixture for DataFrame of floats with index of unique strings + Columns are ['A', 'B', 'C', 'D']. -TIMEZONES = [ - None, - "UTC", - "US/Eastern", - "Asia/Tokyo", + A B C D + P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 + qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 + tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 + wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 + M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 + QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 + r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 + ... ... ... ... ... + IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 + lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 + qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 + yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 + 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 + eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 + xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()) + + +# ---------------------------------------------------------------- +# Operators & Operations +# ---------------------------------------------------------------- +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] + + +@pytest.fixture(params=_all_arithmetic_operators) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + ] +) +def all_arithmetic_functions(request): + """ + Fixture for operator and roperator arithmetic functions. + + Notes + ----- + This includes divmod and rdivmod, whereas all_arithmetic_operators + does not. + """ + return request.param + + +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) +def all_compare_operators(request): + """ + Fixture for dunder names for common compare operations + + * >= + * > + * == + * != + * < + * <= + """ + return request.param + + +@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) +def compare_operators_no_eq_ne(request): + """ + Fixture for dunder names for compare operations except == and != + + * >= + * > + * < + * <= + """ + return request.param + + +@pytest.fixture( + params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] +) +def all_logical_operators(request): + """ + Fixture for dunder names for common logical operations + + * | + * & + * ^ + """ + return request.param + + +# ---------------------------------------------------------------- +# Data sets/files +# ---------------------------------------------------------------- +@pytest.fixture +def strict_data_files(pytestconfig): + """ + Returns the configuration for the test setting `--strict-data-files`. + """ + return pytestconfig.getoption("--strict-data-files") + + +@pytest.fixture +def datapath(strict_data_files): + """ + Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") + + def deco(*args): + path = os.path.join(BASE_PATH, *args) + if not os.path.exists(path): + if strict_data_files: + raise ValueError( + f"Could not find file {path} and --strict-data-files is set." + ) + else: + pytest.skip(f"Could not find {path}.") + return path + + return deco + + +@pytest.fixture +def iris(datapath): + """ + The iris dataset as a DataFrame. + """ + return pd.read_csv(datapath("data", "iris.csv")) + + +# ---------------------------------------------------------------- +# Time zones +# ---------------------------------------------------------------- +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", "dateutil/US/Pacific", "dateutil/Asia/Singapore", tzutc(), @@ -514,6 +764,22 @@ def tz_aware_fixture(request): tz_aware_fixture2 = tz_aware_fixture +@pytest.fixture(scope="module") +def datetime_tz_utc(): + """ + Yields the UTC timezone object from the datetime module. + """ + return timezone.utc + + +@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]) +def utc_fixture(request): + """ + Fixture to provide variants of UTC timezone strings and tzinfo objects. + """ + return request.param + + # ---------------------------------------------------------------- # Dtypes # ---------------------------------------------------------------- @@ -827,291 +1093,81 @@ def any_skipna_inferred_dtype(request): return inferred_dtype, values -@pytest.fixture( - params=[ - getattr(pd.offsets, o) - for o in pd.offsets.__all__ - if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) - ] -) -def tick_classes(request): - """ - Fixture for Tick based datetime offsets available for a time series. +# ---------------------------------------------------------------- +# Misc +# ---------------------------------------------------------------- +@pytest.fixture +def ip(): """ - return request.param + Get an instance of IPython.InteractiveShell. + Will raise a skip if IPython is not installed. + """ + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell -# ---------------------------------------------------------------- -# Global setup for tests using Hypothesis + return InteractiveShell() -# Registering these strategies makes them globally available via st.from_type, -# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py -for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) - ) - -for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, - st.builds( - cls, - n=st.integers(-5, 5), - normalize=st.booleans(), - month=st.integers(min_value=1, max_value=12), - ), - ) - -for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, - st.builds( - cls, - n=st.integers(-24, 24), - normalize=st.booleans(), - startingMonth=st.integers(min_value=1, max_value=12), - ), - ) - - -@pytest.fixture -def empty_series(): - return pd.Series([], index=[], dtype=np.float64) - - -@pytest.fixture -def datetime_series(): - """ - Fixture for Series of floats with DatetimeIndex - """ - s = tm.makeTimeSeries() - s.name = "ts" - return s - - -@pytest.fixture -def string_series(): - """ - Fixture for Series of floats with Index of unique strings - """ - s = tm.makeStringSeries() - s.name = "series" - return s - - -@pytest.fixture -def object_series(): - """ - Fixture for Series of dtype object with Index of unique strings +@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) +def spmatrix(request): """ - s = tm.makeObjectSeries() - s.name = "objects" - return s - - -@pytest.fixture -def float_frame(): + Yields scipy sparse matrix classes. """ - Fixture for DataFrame of floats with index of unique strings + from scipy import sparse - Columns are ['A', 'B', 'C', 'D']. + return getattr(sparse, request.param + "_matrix") - A B C D - P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 - qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 - tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 - wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 - M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 - QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 - r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 - ... ... ... ... ... - IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 - lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 - qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 - yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 - 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 - eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 - xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 - [30 rows x 4 columns] - """ - return DataFrame(tm.getSeriesData()) +_cython_table = pd.core.base.SelectionMixin._cython_table.items() -@pytest.fixture(params=[pd.Index, pd.Series], ids=["index", "series"]) -def index_or_series(request): +@pytest.fixture(params=list(_cython_table)) +def cython_table_items(request): """ - Fixture to parametrize over Index and Series, made necessary by a mypy - bug, giving an error: - - List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" - - See GH#29725 + Yields a tuple of a function and its corresponding name. Correspond to + the list of aggregator "Cython functions" used on selected table items. """ return request.param -@pytest.fixture -def dict_subclass(): - """ - Fixture for a dictionary subclass. - """ - - class TestSubDict(dict): - def __init__(self, *args, **kwargs): - dict.__init__(self, *args, **kwargs) - - return TestSubDict - - -@pytest.fixture -def non_mapping_dict_subclass(): - """ - Fixture for a non-mapping dictionary subclass. - """ - - class TestNonDictMapping(abc.Mapping): - def __init__(self, underlying_dict): - self._data = underlying_dict - - def __getitem__(self, key): - return self._data.__getitem__(key) - - def __iter__(self): - return self._data.__iter__() - - def __len__(self): - return self._data.__len__() - - return TestNonDictMapping - - -def _gen_mi(): - # a MultiIndex used to test the general functionality of this object - - # See Also: tests.multi.conftest.idx - major_axis = Index(["foo", "bar", "baz", "qux"]) - minor_axis = Index(["one", "two"]) - - major_codes = np.array([0, 0, 1, 2, 3, 3]) - minor_codes = np.array([0, 1, 0, 1, 0, 1]) - index_names = ["first", "second"] - mi = MultiIndex( - levels=[major_axis, minor_axis], - codes=[major_codes, minor_codes], - names=index_names, - verify_integrity=False, - ) - return mi - - -indices_dict = { - "unicode": tm.makeUnicodeIndex(100), - "string": tm.makeStringIndex(100), - "datetime": tm.makeDateIndex(100), - "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), - "period": tm.makePeriodIndex(100), - "timedelta": tm.makeTimedeltaIndex(100), - "int": tm.makeIntIndex(100), - "uint": tm.makeUIntIndex(100), - "range": tm.makeRangeIndex(100), - "float": tm.makeFloatIndex(100), - "bool": tm.makeBoolIndex(10), - "categorical": tm.makeCategoricalIndex(100), - "interval": tm.makeIntervalIndex(100), - "empty": Index([]), - "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), - "multi": _gen_mi(), - "repeats": Index([0, 0, 1, 1, 2, 2]), -} - - -@pytest.fixture(params=indices_dict.keys()) -def indices(request): - """ - Fixture for many "simple" kinds of indices. - - These indices are unlikely to cover corner cases, e.g. - - no names - - no NaTs/NaNs - - no values near implementation bounds - - ... - """ - # copy to avoid mutation, e.g. setting .name - return indices_dict[request.param].copy() - - -def _create_series(index): - """ Helper for the _series dict """ - size = len(index) - data = np.random.randn(size) - return pd.Series(data, index=index, name="a") - - -_series = { - f"series-with-{index_id}-index": _create_series(index) - for index_id, index in indices_dict.items() -} - - -@pytest.fixture -def series_with_simple_index(indices): - """ - Fixture for tests on series with changing types of indices. - """ - return _create_series(indices) - - -_narrow_dtypes = [ - np.float16, - np.float32, - np.int8, - np.int16, - np.int32, - np.uint8, - np.uint16, - np.uint32, -] -_narrow_series = { - f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) - for dtype in _narrow_dtypes -} - - -@pytest.fixture(params=_narrow_series.keys()) -def narrow_series(request): - """ - Fixture for Series with low precision data types +def _get_cython_table_params(ndframe, func_names_and_expected): """ - # copy to avoid mutation, e.g. setting .name - return _narrow_series[request.param].copy() - - -_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} + Combine frame, functions from SelectionMixin._cython_table + keys and expected result. + Parameters + ---------- + ndframe : DataFrame or Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value. -@pytest.fixture(params=_index_or_series_objs.keys()) -def index_or_series_obj(request): - """ - Fixture for tests on indexes, series and series with a narrow dtype - copy to avoid mutation, e.g. setting .name + Returns + ------- + list + List of three items (DataFrame, function, expected result) """ - return _index_or_series_objs[request.param].copy(deep=True) + results = [] + for func_name, expected in func_names_and_expected: + results.append((ndframe, func_name, expected)) + results += [ + (ndframe, func, expected) + for func, name in _cython_table + if name == func_name + ] + return results -@pytest.fixture -def multiindex_year_month_day_dataframe_random_data(): +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) + ] +) +def tick_classes(request): """ - DataFrame with 3 level MultiIndex (year, month, day) covering - first 100 business days from 2000-01-01 with random data + Fixture for Tick based datetime offsets available for a time series. """ - tdf = tm.makeTimeDataFrame(100) - ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() - # use Int64Index, to make sure things work - ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) - ymd.index.set_names(["year", "month", "day"], inplace=True) - return ymd + return request.param