diff --git a/.circleci/config.yml b/.circleci/config.yml index ea93575ac9430..e06c80f88a5f2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,7 +9,7 @@ jobs: ENV_FILE: ci/deps/circle-310-arm64.yaml PYTEST_WORKERS: auto PATTERN: "not single_cpu and not slow and not network and not clipboard and not arm_slow and not db" - PYTEST_TARGET: "pandas" + PYTEST_TARGET: "pandas/tests" PANDAS_CI: "1" steps: - checkout @@ -40,7 +40,7 @@ jobs: - run: | . ~/virtualenvs/pandas-dev/bin/activate export PANDAS_CI=1 - python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas/tests --junitxml=test-data.xml build-aarch64: parameters: cibw-build: @@ -67,6 +67,28 @@ jobs: elif ! (curl https://api.github.com/repos/pandas-dev/pandas/issues/$CIRCLE_PR_NUMBER | jq '.labels' | grep -q 'Build'); then circleci-agent step halt fi + - run: + # The process here is more complicated than on Github Actions + # since we need to copy the tests wheel into the unzipped sdist + # so that it gets copied (along with the contents of the unzipped sdist) + # into the Linux docker image used for building the wheel + # (unlike Github Actions there is no host access using cibuildwheel with CircleCI) + name: Build the sdist + command: | + pip3 install build setuptools-scm wheel + python3 -m build --sdist + sdist_name=$(ls ./dist/*.tar.gz) + mkdir unzipped-sdist + tar -xzf $sdist_name -C unzipped-sdist --strip-components=1 + cd pandas + # Have to disable isolation otherwise + # we won't be able to pull the version correctly + python3 -m build --wheel --no-isolation --outdir ../unzipped-sdist + cd .. + echo "sdist_name=./unzipped-sdist" >> "$BASH_ENV" + echo "pandas_tests_loc=$(ls ./unzipped-sdist/*.whl)" >> "$BASH_ENV" + ls ./unzipped-sdist/*.whl + - run: name: Build aarch64 wheels no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that @@ -76,7 +98,8 @@ jobs: if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" fi - cibuildwheel --prerelease-pythons --output-dir wheelhouse + export CIBW_BEFORE_TEST="pip install {project}/${pandas_tests_loc}" + cibuildwheel ${sdist_name} --prerelease-pythons --output-dir wheelhouse environment: CIBW_BUILD: << parameters.cibw-build >> diff --git a/.gitattributes b/.gitattributes index 19c6fd2fd1d47..1ce5bd95ba53a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -77,8 +77,5 @@ Dockerfile export-ignore environment.yml export-ignore setup.py export-ignore - -# GH 39321 -# csv_dir_path fixture checks the existence of the directory -# exclude the whole directory to avoid running related tests in sdist -pandas/tests/io/parser/data export-ignore +# Strip tests from the distribution +pandas/tests export-ignore diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index f93950224eaae..da8250b7af595 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -77,7 +77,7 @@ jobs: PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }} - PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} + PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas/tests' }} NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }} # Clipboard tests QT_QPA_PLATFORM: offscreen @@ -180,7 +180,7 @@ jobs: cancel-in-progress: true env: PANDAS_CI: 1 - PYTEST_TARGET: pandas + PYTEST_TARGET: pandas/tests PATTERN: "not slow and not db and not network and not single_cpu" PYTEST_WORKERS: 'auto' @@ -232,7 +232,7 @@ jobs: python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir export PANDAS_CI=1 - python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas/tests --junitxml=test-data.xml concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit @@ -274,7 +274,7 @@ jobs: run: | . ~/virtualenvs/pandas-dev/bin/activate export PANDAS_CI=1 - python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas/tests --junitxml=test-data.xml concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl @@ -322,7 +322,7 @@ jobs: PYTEST_WORKERS: "auto" PANDAS_CI: 1 PATTERN: "not slow and not network and not clipboard and not single_cpu" - PYTEST_TARGET: pandas + PYTEST_TARGET: pandas/tests steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 470c044d2e99e..6a1bb9dda8f6f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,6 +46,7 @@ jobs: IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} outputs: sdist_file: ${{ steps.save-path.outputs.sdist_name }} + pandas_tests_loc: ${{ steps.save-tests-path.outputs.pandas_tests_loc }} steps: - name: Checkout pandas uses: actions/checkout@v4 @@ -59,22 +60,34 @@ jobs: - name: Build sdist run: | - python -m pip install build + python -m pip install build wheel setuptools-scm python -m build --sdist + - name: Build pandas_tests + run: | + cd pandas + # we want to place the pandas_tests wheel in the same + # dist directory as the sdist + python -m build --wheel --outdir ../dist --no-isolation + - uses: actions/upload-artifact@v4 with: - name: sdist + name: sdist-and-tests path: ./dist/* - - name: Sanity check sdist files + - name: Sanity check built files run: | ls ./dist - name: Output sdist name id: save-path shell: bash -el {0} - run: echo "sdist_name=$(ls ./dist)" >> "$GITHUB_OUTPUT" + run: echo "sdist_name=$(ls ./dist/*.tar.gz)" >> "$GITHUB_OUTPUT" + + - name: Output pandas_tests location + id: save-tests-path + shell: bash -el {0} + run: echo "pandas_tests_loc=$(ls ./dist/*.whl)" >> "$GITHUB_OUTPUT" build_wheels: needs: build_sdist @@ -109,44 +122,40 @@ jobs: with: fetch-depth: 0 - # TODO: Build wheels from sdist again - # There's some sort of weird race condition? - # within Github that makes the sdist be missing files - - # We need to build wheels from the sdist since the sdist - # removes unnecessary files from the release - - name: Download sdist (not macOS) - #if: ${{ matrix.buildplat[1] != 'macosx_*' }} + - name: Download sdist and pandas_tests uses: actions/download-artifact@v4 with: - name: sdist + name: sdist-and-tests path: ./dist - - name: Output sdist name (macOS) - id: save-path - shell: bash -el {0} - run: echo "sdist_name=$(ls ./dist)" >> "$GITHUB_ENV" - - # Python version used to build sdist doesn't matter - # wheel will be built from sdist with the correct version + # The sdist will be corrupted by cibuildwheel/macOS sometimes somehow + # (I think this is a Github Actions bug) - name: Unzip sdist (macOS) if: ${{ startsWith(matrix.buildplat[1], 'macosx') }} run: | - tar -xzf ./dist/${{ env.sdist_name }} -C ./dist + tar -xzf ${{ needs.build_sdist.outputs.sdist_file }} -C ./dist - - name: Output sdist name (macOS) + - name: Output new sdist name (macOS) id: save-path2 + if: ${{ matrix.buildplat[1] == 'macosx_*' }} shell: bash -el {0} - run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV" + run: echo "sdist_name=./dist/$(cd ./dist && ls -d */)" >> "$GITHUB_ENV" - name: Build normal wheels if: ${{ (env.IS_SCHEDULE_DISPATCH != 'true' || env.IS_PUSH == 'true') }} uses: pypa/cibuildwheel@v2.17.0 with: - package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} + package-dir: ${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: CIBW_PRERELEASE_PYTHONS: True CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} + # Note: Since the sdist is the project directory, it does not contain the tests + # We need to manually install tests from the host. This isn't ideal since it breaks + # isolation but should be no big deal + # (On Linux, need to prefix with /host, since we run builds in a container and the root directory is + # mounted to host) + CIBW_BEFORE_TEST: > + pip install ${{ startsWith(matrix.buildplat[0], 'ubuntu') && '/host'|| '' }}${{ github.workspace }}/${{ needs.build_sdist.outputs.pandas_tests_loc }} - name: Build nightly wheels (with NumPy pre-release) if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }} @@ -154,7 +163,7 @@ jobs: with: package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: - # The nightly wheels should be build witht he NumPy 2.0 pre-releases + # The nightly wheels should be build with the NumPy 2.0 pre-releases # which requires the additional URL. CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple CIBW_PRERELEASE_PYTHONS: True @@ -185,6 +194,7 @@ jobs: $TST_CMD = @" python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0; python -m pip install `$(Get-Item pandas\wheelhouse\*.whl); + python -m pip install `$(Get-Item pandas\dist\pandas_tests*.whl); python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`'; "@ # add rc to the end of the image name if the Python version is unreleased @@ -192,6 +202,7 @@ jobs: docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD - uses: actions/upload-artifact@v4 + if: always() with: name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} path: ./wheelhouse/*.whl diff --git a/MANIFEST.in b/MANIFEST.in index 9894381ed6252..e3c874d9bb5ea 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -54,10 +54,9 @@ global-exclude *.h global-exclude *.py[ocd] global-exclude *.pxi -# GH 39321 -# csv_dir_path fixture checks the existence of the directory -# exclude the whole directory to avoid running related tests in sdist -prune pandas/tests/io/parser/data +# Remove tests +# Those will be distributed in a separate package +prune pandas/tests # Selectively re-add *.cxx files that were excluded above graft pandas/_libs/src diff --git a/meson.build b/meson.build index 06623a305ab54..1fbeb688ee676 100644 --- a/meson.build +++ b/meson.build @@ -46,7 +46,7 @@ endif # Needed by pandas.test() when it looks for the pytest ini options py.install_sources( - 'pyproject.toml', + 'pandas/pyproject.toml', subdir: 'pandas' ) diff --git a/pandas/__init__.py b/pandas/__init__.py index 3ee6f6abf97bf..ce42a1d67f757 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -189,6 +189,19 @@ __git_version__ = v.get("full-revisionid") del get_versions, v +import sys + +try: + import pandas_tests # pyright: ignore [reportMissingImports] + + sys.modules["pandas.tests"] = pandas_tests + + del pandas_tests +except ImportError: + pass + +del sys + # module level doc-string __doc__ = """ diff --git a/pandas/conftest.py b/pandas/conftest.py index 65410c3c09494..04a81e21185b8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,120 +1,25 @@ """ -This file is very long and growing, but it was decided to not split it yet, as -it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 +Just a conftest file for doctest stuff -Instead of splitting it was decided to define sections here: -- Configuration / Settings -- Autouse fixtures -- Common arguments -- Missing values & co. -- Classes -- Indices -- Series' -- DataFrames -- Operators & Operations -- Data sets/files -- Time zones -- Dtypes -- Misc +The main conftest file is in pandas/tests/conftest.py """ -from __future__ import annotations - -from collections import abc -from datetime import ( - date, - datetime, - time, - timedelta, - timezone, -) -from decimal import Decimal -import gc -import operator -import os -from typing import ( - TYPE_CHECKING, - Callable, -) -import uuid - -from dateutil.tz import ( - tzlocal, - tzutc, -) -import hypothesis -from hypothesis import strategies as st -import numpy as np import pytest -from pytz import ( - FixedOffset, - utc, -) - -import pandas.util._test_decorators as td - -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, - IntervalDtype, -) - -import pandas as pd -from pandas import ( - CategoricalIndex, - DataFrame, - Interval, - IntervalIndex, - Period, - RangeIndex, - Series, - Timedelta, - Timestamp, - date_range, - period_range, - timedelta_range, -) -import pandas._testing as tm -from pandas.core import ops -from pandas.core.indexes.api import ( - Index, - MultiIndex, -) -from pandas.util.version import Version - -if TYPE_CHECKING: - from collections.abc import ( - Hashable, - Iterator, - ) - -try: - import pyarrow as pa -except ImportError: - has_pyarrow = False -else: - del pa - has_pyarrow = True -import zoneinfo -try: - zoneinfo.ZoneInfo("UTC") -except zoneinfo.ZoneInfoNotFoundError: - zoneinfo = None # type: ignore[assignment] - - -# ---------------------------------------------------------------- -# Configuration / Settings -# ---------------------------------------------------------------- -# pytest +# https://github.com/pytest-dev/pytest/issues/11873 +# Would like to avoid autouse=True, but cannot as of pytest 8.0.0 +@pytest.fixture(autouse=True) +def add_doctest_imports(doctest_namespace) -> None: + """ + Make `np` and `pd` names available for doctests. + """ + import numpy as np + import pandas as pd -def pytest_addoption(parser) -> None: - parser.addoption( - "--no-strict-data-files", - action="store_false", - help="Don't fail if a test is skipped for missing data file.", - ) + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None: @@ -180,1848 +85,3 @@ def pytest_collection_modifyitems(items, config) -> None: for item in items: for path, message in ignored_doctest_warnings: ignore_doctest_warning(item, path, message) - - -hypothesis_health_checks = [hypothesis.HealthCheck.too_slow] -if Version(hypothesis.__version__) >= Version("6.83.2"): - hypothesis_health_checks.append(hypothesis.HealthCheck.differing_executors) - -# Hypothesis -hypothesis.settings.register_profile( - "ci", - # Hypothesis timing checks are tuned for scalars by default, so we bump - # them from 200ms to 500ms per test case as the global default. If this - # is too short for a specific test, (a) try to make it faster, and (b) - # if it really is slow add `@settings(deadline=...)` with a working value, - # or `deadline=None` to entirely disable timeouts for that test. - # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to - # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969) - deadline=None, - suppress_health_check=tuple(hypothesis_health_checks), -) -hypothesis.settings.load_profile("ci") - -# Registering these strategies makes them globally available via st.from_type, -# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py -for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) - ) - -for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, - st.builds( - cls, - n=st.integers(-5, 5), - normalize=st.booleans(), - month=st.integers(min_value=1, max_value=12), - ), - ) - -for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): - cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy( - cls, - st.builds( - cls, - n=st.integers(-24, 24), - normalize=st.booleans(), - startingMonth=st.integers(min_value=1, max_value=12), - ), - ) - - -# ---------------------------------------------------------------- -# Autouse fixtures -# ---------------------------------------------------------------- - - -# https://github.com/pytest-dev/pytest/issues/11873 -# Would like to avoid autouse=True, but cannot as of pytest 8.0.0 -@pytest.fixture(autouse=True) -def add_doctest_imports(doctest_namespace) -> None: - """ - Make `np` and `pd` names available for doctests. - """ - doctest_namespace["np"] = np - doctest_namespace["pd"] = pd - - -@pytest.fixture(autouse=True) -def configure_tests() -> None: - """ - Configure settings for all tests and test modules. - """ - pd.set_option("chained_assignment", "raise") - - -# ---------------------------------------------------------------- -# Common arguments -# ---------------------------------------------------------------- -@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={x!r}") -def axis(request): - """ - Fixture for returning the axis numbers of a DataFrame. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def observed(request): - """ - Pass in the observed keyword to groupby for [True, False] - This indicates whether categoricals should return values for - values which are not in the grouper [False / None], or only values which - appear in the grouper [True]. [None] is supported for future compatibility - if we decide to change the default (and would need to warn if this - parameter is not passed). - """ - return request.param - - -@pytest.fixture(params=[True, False, None]) -def ordered(request): - """ - Boolean 'ordered' parameter for Categorical. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def dropna(request): - """ - Boolean 'dropna' parameter. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def sort(request): - """ - Boolean 'sort' parameter. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def skipna(request): - """ - Boolean 'skipna' parameter. - """ - return request.param - - -@pytest.fixture(params=["first", "last", False]) -def keep(request): - """ - Valid values for the 'keep' parameter used in - .duplicated or .drop_duplicates - """ - return request.param - - -@pytest.fixture(params=["both", "neither", "left", "right"]) -def inclusive_endpoints_fixture(request): - """ - Fixture for trying all interval 'inclusive' parameters. - """ - return request.param - - -@pytest.fixture(params=["left", "right", "both", "neither"]) -def closed(request): - """ - Fixture for trying all interval closed parameters. - """ - return request.param - - -@pytest.fixture(params=["left", "right", "both", "neither"]) -def other_closed(request): - """ - Secondary closed fixture to allow parametrizing over all pairs of closed. - """ - return request.param - - -@pytest.fixture( - params=[ - None, - "gzip", - "bz2", - "zip", - "xz", - "tar", - pytest.param("zstd", marks=td.skip_if_no("zstandard")), - ] -) -def compression(request): - """ - Fixture for trying common compression types in compression tests. - """ - return request.param - - -@pytest.fixture( - params=[ - "gzip", - "bz2", - "zip", - "xz", - "tar", - pytest.param("zstd", marks=td.skip_if_no("zstandard")), - ] -) -def compression_only(request): - """ - Fixture for trying common compression types in compression tests excluding - uncompressed case. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def writable(request): - """ - Fixture that an array is writable. - """ - return request.param - - -@pytest.fixture(params=["inner", "outer", "left", "right"]) -def join_type(request): - """ - Fixture for trying all types of join operations. - """ - return request.param - - -@pytest.fixture(params=["nlargest", "nsmallest"]) -def nselect_method(request): - """ - Fixture for trying all nselect methods. - """ - return request.param - - -@pytest.fixture(params=[None, "ignore"]) -def na_action(request): - """ - Fixture for 'na_action' argument in map. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def ascending(request): - """ - Fixture for 'na_action' argument in sort_values/sort_index/rank. - """ - return request.param - - -@pytest.fixture(params=["average", "min", "max", "first", "dense"]) -def rank_method(request): - """ - Fixture for 'rank' argument in rank. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def as_index(request): - """ - Fixture for 'as_index' argument in groupby. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def cache(request): - """ - Fixture for 'cache' argument in to_datetime. - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def parallel(request): - """ - Fixture for parallel keyword argument for numba.jit. - """ - return request.param - - -# Can parameterize nogil & nopython over True | False, but limiting per -# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472 - - -@pytest.fixture(params=[False]) -def nogil(request): - """ - Fixture for nogil keyword argument for numba.jit. - """ - return request.param - - -@pytest.fixture(params=[True]) -def nopython(request): - """ - Fixture for nopython keyword argument for numba.jit. - """ - return request.param - - -# ---------------------------------------------------------------- -# Missing values & co. -# ---------------------------------------------------------------- -@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) -def nulls_fixture(request): - """ - Fixture for each null type in pandas. - """ - return request.param - - -nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture - - -@pytest.fixture(params=[None, np.nan, pd.NaT]) -def unique_nulls_fixture(request): - """ - Fixture for each null type in pandas, each null type exactly once. - """ - return request.param - - -# Generate cartesian product of unique_nulls_fixture: -unique_nulls_fixture2 = unique_nulls_fixture - - -@pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__) -def np_nat_fixture(request): - """ - Fixture for each NaT type in numpy. - """ - return request.param - - -# Generate cartesian product of np_nat_fixture: -np_nat_fixture2 = np_nat_fixture - - -# ---------------------------------------------------------------- -# Classes -# ---------------------------------------------------------------- - - -@pytest.fixture(params=[DataFrame, Series]) -def frame_or_series(request): - """ - Fixture to parametrize over DataFrame and Series. - """ - return request.param - - -@pytest.fixture(params=[Index, Series], ids=["index", "series"]) -def index_or_series(request): - """ - Fixture to parametrize over Index and Series, made necessary by a mypy - bug, giving an error: - - List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" - - See GH#29725 - """ - return request.param - - -@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"]) -def index_or_series_or_array(request): - """ - Fixture to parametrize over Index, Series, and ExtensionArray - """ - return request.param - - -@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) -def box_with_array(request): - """ - Fixture to test behavior for Index, Series, DataFrame, and pandas Array - classes - """ - return request.param - - -box_with_array2 = box_with_array - - -@pytest.fixture -def dict_subclass() -> type[dict]: - """ - Fixture for a dictionary subclass. - """ - - class TestSubDict(dict): - def __init__(self, *args, **kwargs) -> None: - dict.__init__(self, *args, **kwargs) - - return TestSubDict - - -@pytest.fixture -def non_dict_mapping_subclass() -> type[abc.Mapping]: - """ - Fixture for a non-mapping dictionary subclass. - """ - - class TestNonDictMapping(abc.Mapping): - def __init__(self, underlying_dict) -> None: - self._data = underlying_dict - - def __getitem__(self, key): - return self._data.__getitem__(key) - - def __iter__(self) -> Iterator: - return self._data.__iter__() - - def __len__(self) -> int: - return self._data.__len__() - - return TestNonDictMapping - - -# ---------------------------------------------------------------- -# Indices -# ---------------------------------------------------------------- -@pytest.fixture -def multiindex_year_month_day_dataframe_random_data(): - """ - DataFrame with 3 level MultiIndex (year, month, day) covering - first 100 business days from 2000-01-01 with random data - """ - tdf = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="B"), - ) - ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() - # use int64 Index, to make sure things work - ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) - ymd.index.set_names(["year", "month", "day"], inplace=True) - return ymd - - -@pytest.fixture -def lexsorted_two_level_string_multiindex() -> MultiIndex: - """ - 2-level MultiIndex, lexsorted, with string names. - """ - return MultiIndex( - levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], - codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=["first", "second"], - ) - - -@pytest.fixture -def multiindex_dataframe_random_data( - lexsorted_two_level_string_multiindex, -) -> DataFrame: - """DataFrame with 2 level MultiIndex with random data""" - index = lexsorted_two_level_string_multiindex - return DataFrame( - np.random.default_rng(2).standard_normal((10, 3)), - index=index, - columns=Index(["A", "B", "C"], name="exp"), - ) - - -def _create_multiindex(): - """ - MultiIndex used to test the general functionality of this object - """ - - # See Also: tests.multi.conftest.idx - major_axis = Index(["foo", "bar", "baz", "qux"]) - minor_axis = Index(["one", "two"]) - - major_codes = np.array([0, 0, 1, 2, 3, 3]) - minor_codes = np.array([0, 1, 0, 1, 0, 1]) - index_names = ["first", "second"] - return MultiIndex( - levels=[major_axis, minor_axis], - codes=[major_codes, minor_codes], - names=index_names, - verify_integrity=False, - ) - - -def _create_mi_with_dt64tz_level(): - """ - MultiIndex with a level that is a tzaware DatetimeIndex. - """ - # GH#8367 round trip with pickle - return MultiIndex.from_product( - [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")], - names=["one", "two", "three"], - ) - - -indices_dict = { - "string": Index([f"pandas_{i}" for i in range(100)]), - "datetime": date_range("2020-01-01", periods=100), - "datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"), - "period": period_range("2020-01-01", periods=100, freq="D"), - "timedelta": timedelta_range(start="1 day", periods=100, freq="D"), - "range": RangeIndex(100), - "int8": Index(np.arange(100), dtype="int8"), - "int16": Index(np.arange(100), dtype="int16"), - "int32": Index(np.arange(100), dtype="int32"), - "int64": Index(np.arange(100), dtype="int64"), - "uint8": Index(np.arange(100), dtype="uint8"), - "uint16": Index(np.arange(100), dtype="uint16"), - "uint32": Index(np.arange(100), dtype="uint32"), - "uint64": Index(np.arange(100), dtype="uint64"), - "float32": Index(np.arange(100), dtype="float32"), - "float64": Index(np.arange(100), dtype="float64"), - "bool-object": Index([True, False] * 5, dtype=object), - "bool-dtype": Index([True, False] * 5, dtype=bool), - "complex64": Index( - np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64") - ), - "complex128": Index( - np.arange(100, dtype="complex128") + 1.0j * np.arange(100, dtype="complex128") - ), - "categorical": CategoricalIndex(list("abcd") * 25), - "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)), - "empty": Index([]), - "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), - "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), - "multi": _create_multiindex(), - "repeats": Index([0, 0, 1, 1, 2, 2]), - "nullable_int": Index(np.arange(100), dtype="Int64"), - "nullable_uint": Index(np.arange(100), dtype="UInt16"), - "nullable_float": Index(np.arange(100), dtype="Float32"), - "nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"), - "string-python": Index( - pd.array([f"pandas_{i}" for i in range(100)], dtype="string[python]") - ), -} -if has_pyarrow: - idx = Index(pd.array([f"pandas_{i}" for i in range(100)], dtype="string[pyarrow]")) - indices_dict["string-pyarrow"] = idx - - -@pytest.fixture(params=indices_dict.keys()) -def index(request): - """ - Fixture for many "simple" kinds of indices. - - These indices are unlikely to cover corner cases, e.g. - - no names - - no NaTs/NaNs - - no values near implementation bounds - - ... - """ - # copy to avoid mutation, e.g. setting .name - return indices_dict[request.param].copy() - - -@pytest.fixture( - params=[ - key for key, value in indices_dict.items() if not isinstance(value, MultiIndex) - ] -) -def index_flat(request): - """ - index fixture, but excluding MultiIndex cases. - """ - key = request.param - return indices_dict[key].copy() - - -@pytest.fixture( - params=[ - key - for key, value in indices_dict.items() - if not ( - key.startswith(("int", "uint", "float")) - or key in ["range", "empty", "repeats", "bool-dtype"] - ) - and not isinstance(value, MultiIndex) - ] -) -def index_with_missing(request): - """ - Fixture for indices with missing values. - - Integer-dtype and empty cases are excluded because they cannot hold missing - values. - - MultiIndex is excluded because isna() is not defined for MultiIndex. - """ - - # GH 35538. Use deep copy to avoid illusive bug on np-dev - # GHA pipeline that writes into indices_dict despite copy - ind = indices_dict[request.param].copy(deep=True) - vals = ind.values.copy() - if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: - # For setting missing values in the top level of MultiIndex - vals = ind.tolist() - vals[0] = (None,) + vals[0][1:] - vals[-1] = (None,) + vals[-1][1:] - return MultiIndex.from_tuples(vals) - else: - vals[0] = None - vals[-1] = None - return type(ind)(vals) - - -# ---------------------------------------------------------------- -# Series' -# ---------------------------------------------------------------- -@pytest.fixture -def string_series() -> Series: - """ - Fixture for Series of floats with Index of unique strings - """ - return Series( - np.arange(30, dtype=np.float64) * 1.1, - index=Index([f"i_{i}" for i in range(30)], dtype=object), - name="series", - ) - - -@pytest.fixture -def object_series() -> Series: - """ - Fixture for Series of dtype object with Index of unique strings - """ - data = [f"foo_{i}" for i in range(30)] - index = Index([f"bar_{i}" for i in range(30)], dtype=object) - return Series(data, index=index, name="objects", dtype=object) - - -@pytest.fixture -def datetime_series() -> Series: - """ - Fixture for Series of floats with DatetimeIndex - """ - return Series( - np.random.default_rng(2).standard_normal(30), - index=date_range("2000-01-01", periods=30, freq="B"), - name="ts", - ) - - -def _create_series(index): - """Helper for the _series dict""" - size = len(index) - data = np.random.default_rng(2).standard_normal(size) - return Series(data, index=index, name="a", copy=False) - - -_series = { - f"series-with-{index_id}-index": _create_series(index) - for index_id, index in indices_dict.items() -} - - -@pytest.fixture -def series_with_simple_index(index) -> Series: - """ - Fixture for tests on series with changing types of indices. - """ - return _create_series(index) - - -_narrow_series = { - f"{dtype.__name__}-series": Series( - range(30), index=[f"i-{i}" for i in range(30)], name="a", dtype=dtype - ) - for dtype in tm.NARROW_NP_DTYPES -} - - -_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} - - -@pytest.fixture(params=_index_or_series_objs.keys()) -def index_or_series_obj(request): - """ - Fixture for tests on indexes, series and series with a narrow dtype - copy to avoid mutation, e.g. setting .name - """ - return _index_or_series_objs[request.param].copy(deep=True) - - -_typ_objects_series = { - f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES -} - - -_index_or_series_memory_objs = { - **indices_dict, - **_series, - **_narrow_series, - **_typ_objects_series, -} - - -@pytest.fixture(params=_index_or_series_memory_objs.keys()) -def index_or_series_memory_obj(request): - """ - Fixture for tests on indexes, series, series with a narrow dtype and - series with empty objects type - copy to avoid mutation, e.g. setting .name - """ - return _index_or_series_memory_objs[request.param].copy(deep=True) - - -# ---------------------------------------------------------------- -# DataFrames -# ---------------------------------------------------------------- -@pytest.fixture -def int_frame() -> DataFrame: - """ - Fixture for DataFrame of ints with index of unique strings - - Columns are ['A', 'B', 'C', 'D'] - """ - return DataFrame( - np.ones((30, 4), dtype=np.int64), - index=Index([f"foo_{i}" for i in range(30)], dtype=object), - columns=Index(list("ABCD"), dtype=object), - ) - - -@pytest.fixture -def float_frame() -> DataFrame: - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - return DataFrame( - np.random.default_rng(2).standard_normal((30, 4)), - index=Index([f"foo_{i}" for i in range(30)]), - columns=Index(list("ABCD")), - ) - - -@pytest.fixture -def rand_series_with_duplicate_datetimeindex() -> Series: - """ - Fixture for Series with a DatetimeIndex that has duplicates. - """ - dates = [ - datetime(2000, 1, 2), - datetime(2000, 1, 2), - datetime(2000, 1, 2), - datetime(2000, 1, 3), - datetime(2000, 1, 3), - datetime(2000, 1, 3), - datetime(2000, 1, 4), - datetime(2000, 1, 4), - datetime(2000, 1, 4), - datetime(2000, 1, 5), - ] - - return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) - - -# ---------------------------------------------------------------- -# Scalars -# ---------------------------------------------------------------- -@pytest.fixture( - params=[ - (Interval(left=0, right=5), IntervalDtype("int64", "right")), - (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), - (Period("2012-01", freq="M"), "period[M]"), - (Period("2012-02-01", freq="D"), "period[D]"), - ( - Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(unit="s", tz="US/Eastern"), - ), - (Timedelta(seconds=500), "timedelta64[ns]"), - ] -) -def ea_scalar_and_dtype(request): - return request.param - - -# ---------------------------------------------------------------- -# Operators & Operations -# ---------------------------------------------------------------- - - -@pytest.fixture(params=tm.arithmetic_dunder_methods) -def all_arithmetic_operators(request): - """ - Fixture for dunder names for common arithmetic operations. - """ - return request.param - - -@pytest.fixture( - params=[ - operator.add, - ops.radd, - operator.sub, - ops.rsub, - operator.mul, - ops.rmul, - operator.truediv, - ops.rtruediv, - operator.floordiv, - ops.rfloordiv, - operator.mod, - ops.rmod, - operator.pow, - ops.rpow, - operator.eq, - operator.ne, - operator.lt, - operator.le, - operator.gt, - operator.ge, - operator.and_, - ops.rand_, - operator.xor, - ops.rxor, - operator.or_, - ops.ror_, - ] -) -def all_binary_operators(request): - """ - Fixture for operator and roperator arithmetic, comparison, and logical ops. - """ - return request.param - - -@pytest.fixture( - params=[ - operator.add, - ops.radd, - operator.sub, - ops.rsub, - operator.mul, - ops.rmul, - operator.truediv, - ops.rtruediv, - operator.floordiv, - ops.rfloordiv, - operator.mod, - ops.rmod, - operator.pow, - ops.rpow, - ] -) -def all_arithmetic_functions(request): - """ - Fixture for operator and roperator arithmetic functions. - - Notes - ----- - This includes divmod and rdivmod, whereas all_arithmetic_operators - does not. - """ - return request.param - - -_all_numeric_reductions = [ - "count", - "sum", - "max", - "min", - "mean", - "prod", - "std", - "var", - "median", - "kurt", - "skew", - "sem", -] - - -@pytest.fixture(params=_all_numeric_reductions) -def all_numeric_reductions(request): - """ - Fixture for numeric reduction names. - """ - return request.param - - -_all_boolean_reductions = ["all", "any"] - - -@pytest.fixture(params=_all_boolean_reductions) -def all_boolean_reductions(request): - """ - Fixture for boolean reduction names. - """ - return request.param - - -_all_reductions = _all_numeric_reductions + _all_boolean_reductions - - -@pytest.fixture(params=_all_reductions) -def all_reductions(request): - """ - Fixture for all (boolean + numeric) reduction names. - """ - return request.param - - -@pytest.fixture( - params=[ - operator.eq, - operator.ne, - operator.gt, - operator.ge, - operator.lt, - operator.le, - ] -) -def comparison_op(request): - """ - Fixture for operator module comparison functions. - """ - return request.param - - -@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) -def compare_operators_no_eq_ne(request): - """ - Fixture for dunder names for compare operations except == and != - - * >= - * > - * < - * <= - """ - return request.param - - -@pytest.fixture( - params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] -) -def all_logical_operators(request): - """ - Fixture for dunder names for common logical operations - - * | - * & - * ^ - """ - return request.param - - -_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"] - - -@pytest.fixture(params=_all_numeric_accumulations) -def all_numeric_accumulations(request): - """ - Fixture for numeric accumulation names - """ - return request.param - - -# ---------------------------------------------------------------- -# Data sets/files -# ---------------------------------------------------------------- -@pytest.fixture -def strict_data_files(pytestconfig): - """ - Returns the configuration for the test setting `--no-strict-data-files`. - """ - return pytestconfig.getoption("--no-strict-data-files") - - -@pytest.fixture -def datapath(strict_data_files: str) -> Callable[..., str]: - """ - Get the path to a data file. - - Parameters - ---------- - path : str - Path to the file, relative to ``pandas/tests/`` - - Returns - ------- - path including ``pandas/tests``. - - Raises - ------ - ValueError - If the path doesn't exist and the --no-strict-data-files option is not set. - """ - BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") - - def deco(*args): - path = os.path.join(BASE_PATH, *args) - if not os.path.exists(path): - if strict_data_files: - raise ValueError( - f"Could not find file {path} and --no-strict-data-files is not set." - ) - pytest.skip(f"Could not find {path}.") - return path - - return deco - - -# ---------------------------------------------------------------- -# Time zones -# ---------------------------------------------------------------- -TIMEZONES = [ - None, - "UTC", - "US/Eastern", - "Asia/Tokyo", - "dateutil/US/Pacific", - "dateutil/Asia/Singapore", - "+01:15", - "-02:15", - "UTC+01:15", - "UTC-02:15", - tzutc(), - tzlocal(), - FixedOffset(300), - FixedOffset(0), - FixedOffset(-300), - timezone.utc, - timezone(timedelta(hours=1)), - timezone(timedelta(hours=-1), name="foo"), -] -if zoneinfo is not None: - TIMEZONES.extend( - [ - zoneinfo.ZoneInfo("US/Pacific"), # type: ignore[list-item] - zoneinfo.ZoneInfo("UTC"), # type: ignore[list-item] - ] - ) -TIMEZONE_IDS = [repr(i) for i in TIMEZONES] - - -@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) -@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) -def tz_naive_fixture(request): - """ - Fixture for trying timezones including default (None): {0} - """ - return request.param - - -@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) -@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) -def tz_aware_fixture(request): - """ - Fixture for trying explicit timezones: {0} - """ - return request.param - - -_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc] -if zoneinfo is not None: - _UTCS.append(zoneinfo.ZoneInfo("UTC")) - - -@pytest.fixture(params=_UTCS) -def utc_fixture(request): - """ - Fixture to provide variants of UTC timezone strings and tzinfo objects. - """ - return request.param - - -utc_fixture2 = utc_fixture - - -@pytest.fixture(params=["s", "ms", "us", "ns"]) -def unit(request): - """ - datetime64 units we support. - """ - return request.param - - -unit2 = unit - - -# ---------------------------------------------------------------- -# Dtypes -# ---------------------------------------------------------------- -@pytest.fixture(params=tm.STRING_DTYPES) -def string_dtype(request): - """ - Parametrized fixture for string dtypes. - - * str - * 'str' - * 'U' - """ - return request.param - - -@pytest.fixture( - params=[ - "string[python]", - pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), - ] -) -def nullable_string_dtype(request): - """ - Parametrized fixture for string dtypes. - - * 'string[python]' - * 'string[pyarrow]' - """ - return request.param - - -@pytest.fixture( - params=[ - "python", - pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")), - pytest.param("pyarrow_numpy", marks=td.skip_if_no("pyarrow")), - ] -) -def string_storage(request): - """ - Parametrized fixture for pd.options.mode.string_storage. - - * 'python' - * 'pyarrow' - * 'pyarrow_numpy' - """ - return request.param - - -@pytest.fixture( - params=[ - "numpy_nullable", - pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")), - ] -) -def dtype_backend(request): - """ - Parametrized fixture for pd.options.mode.string_storage. - - * 'python' - * 'pyarrow' - """ - return request.param - - -# Alias so we can test with cartesian product of string_storage -string_storage2 = string_storage - - -@pytest.fixture(params=tm.BYTES_DTYPES) -def bytes_dtype(request): - """ - Parametrized fixture for bytes dtypes. - - * bytes - * 'bytes' - """ - return request.param - - -@pytest.fixture(params=tm.OBJECT_DTYPES) -def object_dtype(request): - """ - Parametrized fixture for object dtypes. - - * object - * 'object' - """ - return request.param - - -@pytest.fixture( - params=[ - "object", - "string[python]", - pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), - pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")), - ] -) -def any_string_dtype(request): - """ - Parametrized fixture for string dtypes. - * 'object' - * 'string[python]' - * 'string[pyarrow]' - """ - return request.param - - -@pytest.fixture(params=tm.DATETIME64_DTYPES) -def datetime64_dtype(request): - """ - Parametrized fixture for datetime64 dtypes. - - * 'datetime64[ns]' - * 'M8[ns]' - """ - return request.param - - -@pytest.fixture(params=tm.TIMEDELTA64_DTYPES) -def timedelta64_dtype(request): - """ - Parametrized fixture for timedelta64 dtypes. - - * 'timedelta64[ns]' - * 'm8[ns]' - """ - return request.param - - -@pytest.fixture -def fixed_now_ts() -> Timestamp: - """ - Fixture emits fixed Timestamp.now() - """ - return Timestamp( # pyright: ignore[reportReturnType] - year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22 - ) - - -@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES) -def float_numpy_dtype(request): - """ - Parameterized fixture for float dtypes. - - * float - * 'float32' - * 'float64' - """ - return request.param - - -@pytest.fixture(params=tm.FLOAT_EA_DTYPES) -def float_ea_dtype(request): - """ - Parameterized fixture for float dtypes. - - * 'Float32' - * 'Float64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_FLOAT_DTYPES) -def any_float_dtype(request): - """ - Parameterized fixture for float dtypes. - - * float - * 'float32' - * 'float64' - * 'Float32' - * 'Float64' - """ - return request.param - - -@pytest.fixture(params=tm.COMPLEX_DTYPES) -def complex_dtype(request): - """ - Parameterized fixture for complex dtypes. - - * complex - * 'complex64' - * 'complex128' - """ - return request.param - - -@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) -def any_signed_int_numpy_dtype(request): - """ - Parameterized fixture for signed integer dtypes. - - * int - * 'int8' - * 'int16' - * 'int32' - * 'int64' - """ - return request.param - - -@pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES) -def any_unsigned_int_numpy_dtype(request): - """ - Parameterized fixture for unsigned integer dtypes. - - * 'uint8' - * 'uint16' - * 'uint32' - * 'uint64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES) -def any_int_numpy_dtype(request): - """ - Parameterized fixture for any integer dtype. - - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_INT_EA_DTYPES) -def any_int_ea_dtype(request): - """ - Parameterized fixture for any nullable integer dtype. - - * 'UInt8' - * 'Int8' - * 'UInt16' - * 'Int16' - * 'UInt32' - * 'Int32' - * 'UInt64' - * 'Int64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_INT_DTYPES) -def any_int_dtype(request): - """ - Parameterized fixture for any nullable integer dtype. - - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - * 'UInt8' - * 'Int8' - * 'UInt16' - * 'Int16' - * 'UInt32' - * 'Int32' - * 'UInt64' - * 'Int64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES) -def any_numeric_ea_dtype(request): - """ - Parameterized fixture for any nullable integer dtype and - any float ea dtypes. - - * 'UInt8' - * 'Int8' - * 'UInt16' - * 'Int16' - * 'UInt32' - * 'Int32' - * 'UInt64' - * 'Int64' - * 'Float32' - * 'Float64' - """ - return request.param - - -# Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any], -# Type[object]]]" and "List[str]") -@pytest.fixture( - params=tm.ALL_INT_EA_DTYPES - + tm.FLOAT_EA_DTYPES - + tm.ALL_INT_PYARROW_DTYPES_STR_REPR - + tm.FLOAT_PYARROW_DTYPES_STR_REPR # type: ignore[operator] -) -def any_numeric_ea_and_arrow_dtype(request): - """ - Parameterized fixture for any nullable integer dtype and - any float ea dtypes. - - * 'UInt8' - * 'Int8' - * 'UInt16' - * 'Int16' - * 'UInt32' - * 'Int32' - * 'UInt64' - * 'Int64' - * 'Float32' - * 'Float64' - * 'uint8[pyarrow]' - * 'int8[pyarrow]' - * 'uint16[pyarrow]' - * 'int16[pyarrow]' - * 'uint32[pyarrow]' - * 'int32[pyarrow]' - * 'uint64[pyarrow]' - * 'int64[pyarrow]' - * 'float32[pyarrow]' - * 'float64[pyarrow]' - """ - return request.param - - -@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES) -def any_signed_int_ea_dtype(request): - """ - Parameterized fixture for any signed nullable integer dtype. - - * 'Int8' - * 'Int16' - * 'Int32' - * 'Int64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES) -def any_real_numpy_dtype(request): - """ - Parameterized fixture for any (purely) real numeric dtype. - - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - * float - * 'float32' - * 'float64' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_REAL_DTYPES) -def any_real_numeric_dtype(request): - """ - Parameterized fixture for any (purely) real numeric dtype. - - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - * float - * 'float32' - * 'float64' - - and associated ea dtypes. - """ - return request.param - - -@pytest.fixture(params=tm.ALL_NUMPY_DTYPES) -def any_numpy_dtype(request): - """ - Parameterized fixture for all numpy dtypes. - - * bool - * 'bool' - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - * float - * 'float32' - * 'float64' - * complex - * 'complex64' - * 'complex128' - * str - * 'str' - * 'U' - * bytes - * 'bytes' - * 'datetime64[ns]' - * 'M8[ns]' - * 'timedelta64[ns]' - * 'm8[ns]' - * object - * 'object' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES) -def any_real_nullable_dtype(request): - """ - Parameterized fixture for all real dtypes that can hold NA. - - * float - * 'float32' - * 'float64' - * 'Float32' - * 'Float64' - * 'UInt8' - * 'UInt16' - * 'UInt32' - * 'UInt64' - * 'Int8' - * 'Int16' - * 'Int32' - * 'Int64' - * 'uint8[pyarrow]' - * 'uint16[pyarrow]' - * 'uint32[pyarrow]' - * 'uint64[pyarrow]' - * 'int8[pyarrow]' - * 'int16[pyarrow]' - * 'int32[pyarrow]' - * 'int64[pyarrow]' - * 'float[pyarrow]' - * 'double[pyarrow]' - """ - return request.param - - -@pytest.fixture(params=tm.ALL_NUMERIC_DTYPES) -def any_numeric_dtype(request): - """ - Parameterized fixture for all numeric dtypes. - - * int - * 'int8' - * 'uint8' - * 'int16' - * 'uint16' - * 'int32' - * 'uint32' - * 'int64' - * 'uint64' - * float - * 'float32' - * 'float64' - * complex - * 'complex64' - * 'complex128' - * 'UInt8' - * 'Int8' - * 'UInt16' - * 'Int16' - * 'UInt32' - * 'Int32' - * 'UInt64' - * 'Int64' - * 'Float32' - * 'Float64' - """ - return request.param - - -# categoricals are handled separately -_any_skipna_inferred_dtype = [ - ("string", ["a", np.nan, "c"]), - ("string", ["a", pd.NA, "c"]), - ("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array - ("bytes", [b"a", np.nan, b"c"]), - ("empty", [np.nan, np.nan, np.nan]), - ("empty", []), - ("mixed-integer", ["a", np.nan, 2]), - ("mixed", ["a", np.nan, 2.0]), - ("floating", [1.0, np.nan, 2.0]), - ("integer", [1, np.nan, 2]), - ("mixed-integer-float", [1, np.nan, 2.0]), - ("decimal", [Decimal(1), np.nan, Decimal(2)]), - ("boolean", [True, np.nan, False]), - ("boolean", [True, pd.NA, False]), - ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), - ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), - ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), - ("complex", [1 + 1j, np.nan, 2 + 2j]), - # The following dtype is commented out due to GH 23554 - # ('timedelta64', [np.timedelta64(1, 'D'), - # np.nan, np.timedelta64(2, 'D')]), - ("timedelta", [timedelta(1), np.nan, timedelta(2)]), - ("time", [time(1), np.nan, time(2)]), - ("period", [Period(2013), pd.NaT, Period(2018)]), - ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), -] -ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id - - -@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) -def any_skipna_inferred_dtype(request): - """ - Fixture for all inferred dtypes from _libs.lib.infer_dtype - - The covered (inferred) types are: - * 'string' - * 'empty' - * 'bytes' - * 'mixed' - * 'mixed-integer' - * 'mixed-integer-float' - * 'floating' - * 'integer' - * 'decimal' - * 'boolean' - * 'datetime64' - * 'datetime' - * 'date' - * 'timedelta' - * 'time' - * 'period' - * 'interval' - - Returns - ------- - inferred_dtype : str - The string for the inferred dtype from _libs.lib.infer_dtype - values : np.ndarray - An array of object dtype that will be inferred to have - `inferred_dtype` - - Examples - -------- - >>> from pandas._libs import lib - >>> - >>> def test_something(any_skipna_inferred_dtype): - ... inferred_dtype, values = any_skipna_inferred_dtype - ... # will pass - ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype - """ - inferred_dtype, values = request.param - values = np.array(values, dtype=object) # object dtype to avoid casting - - # correctness of inference tested in tests/dtypes/test_inference.py - return inferred_dtype, values - - -# ---------------------------------------------------------------- -# Misc -# ---------------------------------------------------------------- -@pytest.fixture -def ip(): - """ - Get an instance of IPython.InteractiveShell. - - Will raise a skip if IPython is not installed. - """ - pytest.importorskip("IPython", minversion="6.0.0") - from IPython.core.interactiveshell import InteractiveShell - - # GH#35711 make sure sqlite history file handle is not leaked - from traitlets.config import Config # isort:skip - - c = Config() - c.HistoryManager.hist_file = ":memory:" - - return InteractiveShell(config=c) - - -@pytest.fixture -def mpl_cleanup(): - """ - Ensure Matplotlib is cleaned up around a test. - - Before a test is run: - - 1) Set the backend to "template" to avoid requiring a GUI. - - After a test is run: - - 1) Reset units registry - 2) Reset rc_context - 3) Close all figures - - See matplotlib/testing/decorators.py#L24. - """ - mpl = pytest.importorskip("matplotlib") - mpl_units = pytest.importorskip("matplotlib.units") - plt = pytest.importorskip("matplotlib.pyplot") - orig_units_registry = mpl_units.registry.copy() - try: - with mpl.rc_context(): - mpl.use("template") - yield - finally: - mpl_units.registry.clear() - mpl_units.registry.update(orig_units_registry) - plt.close("all") - # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 - gc.collect(1) - - -@pytest.fixture( - params=[ - getattr(pd.offsets, o) - for o in pd.offsets.__all__ - if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) and o != "Tick" - ] -) -def tick_classes(request): - """ - Fixture for Tick based datetime offsets available for a time series. - """ - return request.param - - -@pytest.fixture(params=[None, lambda x: x]) -def sort_by_key(request): - """ - Simple fixture for testing keys in sorting methods. - Tests None (no key) and the identity key. - """ - return request.param - - -@pytest.fixture( - params=[ - ("foo", None, None), - ("Egon", "Venkman", None), - ("NCC1701D", "NCC1701D", "NCC1701D"), - # possibly-matching NAs - (np.nan, np.nan, np.nan), - (np.nan, pd.NaT, None), - (np.nan, pd.NA, None), - (pd.NA, pd.NA, pd.NA), - ] -) -def names(request) -> tuple[Hashable, Hashable, Hashable]: - """ - A 3-tuple of names, the first two for operands, the last for a result. - """ - return request.param - - -@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) -def indexer_sli(request): - """ - Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ - """ - return request.param - - -@pytest.fixture(params=[tm.loc, tm.iloc]) -def indexer_li(request): - """ - Parametrize over loc.__getitem__, iloc.__getitem__ - """ - return request.param - - -@pytest.fixture(params=[tm.setitem, tm.iloc]) -def indexer_si(request): - """ - Parametrize over __setitem__, iloc.__setitem__ - """ - return request.param - - -@pytest.fixture(params=[tm.setitem, tm.loc]) -def indexer_sl(request): - """ - Parametrize over __setitem__, loc.__setitem__ - """ - return request.param - - -@pytest.fixture(params=[tm.at, tm.loc]) -def indexer_al(request): - """ - Parametrize over at.__setitem__, loc.__setitem__ - """ - return request.param - - -@pytest.fixture(params=[tm.iat, tm.iloc]) -def indexer_ial(request): - """ - Parametrize over iat.__setitem__, iloc.__setitem__ - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def performance_warning(request) -> Iterator[bool | type[Warning]]: - """ - Fixture to check if performance warnings are enabled. Either produces - ``PerformanceWarning`` if they are enabled, otherwise ``False``. - """ - with pd.option_context("mode.performance_warnings", request.param): - yield pd.errors.PerformanceWarning if request.param else False - - -@pytest.fixture -def using_infer_string() -> bool: - """ - Fixture to check if infer string option is enabled. - """ - return pd.options.future.infer_string is True - - -warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] -if zoneinfo is not None: - warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw")) # type: ignore[arg-type] - - -@pytest.fixture(params=warsaws) -def warsaw(request) -> str: - """ - tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo. - """ - return request.param - - -@pytest.fixture -def arrow_string_storage(): - """ - Fixture that lists possible PyArrow values for StringDtype storage field. - """ - return ("pyarrow", "pyarrow_numpy") - - -@pytest.fixture -def temp_file(tmp_path): - """ - Generate a unique file for testing use. See link for removal policy. - https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html#the-default-base-temporary-directory - """ - file_path = tmp_path / str(uuid.uuid4()) - file_path.touch() - return file_path diff --git a/pandas/meson.build b/pandas/meson.build index 435103a954d86..a6ca8b68e0460 100644 --- a/pandas/meson.build +++ b/pandas/meson.build @@ -34,10 +34,18 @@ subdirs_list = [ 'errors', 'io', 'plotting', - 'tests', 'tseries', 'util' ] + +# Add the tests in if they are present +# (they would be present normally but +# would not be present in an sdist since +# we strip them out for build size reasons) +if fs.is_dir('tests') + subdirs_list += 'tests' +endif + foreach subdir: subdirs_list install_subdir(subdir, install_dir: py.get_install_dir() / 'pandas') endforeach diff --git a/pandas/pyproject.toml b/pandas/pyproject.toml new file mode 100644 index 0000000000000..1eb3794b44105 --- /dev/null +++ b/pandas/pyproject.toml @@ -0,0 +1,34 @@ +# Note: This is the pyproject.toml for the pandas-tests package +# The pyproject.toml for the pandas package is in the parent directory +# of this one +[build-system] +requires = [ + "setuptools>=61.0.0", +] +[project] +name="pandas-tests" +# Note: The version is hardcoded since the pandas version has to +# match the tests version. Remember to bump both! +dynamic = ["version"] +#dependencies=[ +# "pandas==2.1.0" +#] +requires-python=">=3.9" + +[tool.setuptools] +package-dir={"pandas_tests"= "tests"} + +[tool.setuptools_scm] +root = ".." + +[tool.setuptools.package-data] +"pandas_tests"=["pytest.ini"] + +# Only present to specify options for the doctests +[tool.pytest.ini_options] +minversion = "7.3.2" +doctest_optionflags = [ + "NORMALIZE_WHITESPACE", + "IGNORE_EXCEPTION_DETAIL", + "ELLIPSIS", +] diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py new file mode 100644 index 0000000000000..3b1656c41e14a --- /dev/null +++ b/pandas/tests/conftest.py @@ -0,0 +1,1955 @@ +""" +This file is very long and growing, but it was decided to not split it yet, as +it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 + +Instead of splitting it was decided to define sections here: +- Configuration / Settings +- Autouse fixtures +- Common arguments +- Missing values & co. +- Classes +- Indices +- Series' +- DataFrames +- Operators & Operations +- Data sets/files +- Time zones +- Dtypes +- Misc +""" + +from __future__ import annotations + +from collections import abc +from datetime import ( + date, + datetime, + time, + timedelta, + timezone, +) +from decimal import Decimal +import gc +import operator +import os +from typing import ( + TYPE_CHECKING, + Callable, +) +import uuid + +from dateutil.tz import ( + tzlocal, + tzutc, +) +import hypothesis +from hypothesis import strategies as st +import numpy as np +import pytest +from pytz import ( + FixedOffset, + utc, +) + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, +) + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Interval, + IntervalIndex, + Period, + RangeIndex, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) +from pandas.util.version import Version + +if TYPE_CHECKING: + from collections.abc import ( + Hashable, + Iterator, + ) + +try: + import pyarrow as pa +except ImportError: + has_pyarrow = False +else: + del pa + has_pyarrow = True + +import zoneinfo + +try: + zoneinfo.ZoneInfo("UTC") +except zoneinfo.ZoneInfoNotFoundError: + zoneinfo = None # type: ignore[assignment] + + +# ---------------------------------------------------------------- +# Configuration / Settings +# ---------------------------------------------------------------- +# pytest + + +def pytest_addoption(parser) -> None: + parser.addoption( + "--no-strict-data-files", + action="store_false", + help="Don't fail if a test is skipped for missing data file.", + ) + + +hypothesis_health_checks = [hypothesis.HealthCheck.too_slow] +if Version(hypothesis.__version__) >= Version("6.83.2"): + hypothesis_health_checks.append(hypothesis.HealthCheck.differing_executors) + +# Hypothesis +hypothesis.settings.register_profile( + "ci", + # Hypothesis timing checks are tuned for scalars by default, so we bump + # them from 200ms to 500ms per test case as the global default. If this + # is too short for a specific test, (a) try to make it faster, and (b) + # if it really is slow add `@settings(deadline=...)` with a working value, + # or `deadline=None` to entirely disable timeouts for that test. + # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to + # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969) + deadline=None, + suppress_health_check=tuple(hypothesis_health_checks), +) +hypothesis.settings.load_profile("ci") + +# Registering these strategies makes them globally available via st.from_type, +# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) + +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) + + +# ---------------------------------------------------------------- +# Autouse fixtures +# ---------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def configure_tests() -> None: + """ + Configure settings for all tests and test modules. + """ + pd.set_option("chained_assignment", "raise") + + +# ---------------------------------------------------------------- +# Common arguments +# ---------------------------------------------------------------- +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={x!r}") +def axis(request): + """ + Fixture for returning the axis numbers of a DataFrame. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def observed(request): + """ + Pass in the observed keyword to groupby for [True, False] + This indicates whether categoricals should return values for + values which are not in the grouper [False / None], or only values which + appear in the grouper [True]. [None] is supported for future compatibility + if we decide to change the default (and would need to warn if this + parameter is not passed). + """ + return request.param + + +@pytest.fixture(params=[True, False, None]) +def ordered(request): + """ + Boolean 'ordered' parameter for Categorical. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def dropna(request): + """ + Boolean 'dropna' parameter. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def sort(request): + """ + Boolean 'sort' parameter. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def skipna(request): + """ + Boolean 'skipna' parameter. + """ + return request.param + + +@pytest.fixture(params=["first", "last", False]) +def keep(request): + """ + Valid values for the 'keep' parameter used in + .duplicated or .drop_duplicates + """ + return request.param + + +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + """ + Fixture for trying all interval 'inclusive' parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def closed(request): + """ + Fixture for trying all interval closed parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def other_closed(request): + """ + Secondary closed fixture to allow parametrizing over all pairs of closed. + """ + return request.param + + +@pytest.fixture( + params=[ + None, + "gzip", + "bz2", + "zip", + "xz", + "tar", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression(request): + """ + Fixture for trying common compression types in compression tests. + """ + return request.param + + +@pytest.fixture( + params=[ + "gzip", + "bz2", + "zip", + "xz", + "tar", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def writable(request): + """ + Fixture that an array is writable. + """ + return request.param + + +@pytest.fixture(params=["inner", "outer", "left", "right"]) +def join_type(request): + """ + Fixture for trying all types of join operations. + """ + return request.param + + +@pytest.fixture(params=["nlargest", "nsmallest"]) +def nselect_method(request): + """ + Fixture for trying all nselect methods. + """ + return request.param + + +@pytest.fixture(params=[None, "ignore"]) +def na_action(request): + """ + Fixture for 'na_action' argument in map. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + """ + Fixture for 'na_action' argument in sort_values/sort_index/rank. + """ + return request.param + + +@pytest.fixture(params=["average", "min", "max", "first", "dense"]) +def rank_method(request): + """ + Fixture for 'rank' argument in rank. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_index(request): + """ + Fixture for 'as_index' argument in groupby. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def cache(request): + """ + Fixture for 'cache' argument in to_datetime. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def parallel(request): + """ + Fixture for parallel keyword argument for numba.jit. + """ + return request.param + + +# Can parameterize nogil & nopython over True | False, but limiting per +# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472 + + +@pytest.fixture(params=[False]) +def nogil(request): + """ + Fixture for nogil keyword argument for numba.jit. + """ + return request.param + + +@pytest.fixture(params=[True]) +def nopython(request): + """ + Fixture for nopython keyword argument for numba.jit. + """ + return request.param + + +# ---------------------------------------------------------------- +# Missing values & co. +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) +def nulls_fixture(request): + """ + Fixture for each null type in pandas. + """ + return request.param + + +nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture + + +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): + """ + Fixture for each null type in pandas, each null type exactly once. + """ + return request.param + + +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture + + +@pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__) +def np_nat_fixture(request): + """ + Fixture for each NaT type in numpy. + """ + return request.param + + +# Generate cartesian product of np_nat_fixture: +np_nat_fixture2 = np_nat_fixture + + +# ---------------------------------------------------------------- +# Classes +# ---------------------------------------------------------------- + + +@pytest.fixture(params=[DataFrame, Series]) +def frame_or_series(request): + """ + Fixture to parametrize over DataFrame and Series. + """ + return request.param + + +@pytest.fixture(params=[Index, Series], ids=["index", "series"]) +def index_or_series(request): + """ + Fixture to parametrize over Index and Series, made necessary by a mypy + bug, giving an error: + + List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + + See GH#29725 + """ + return request.param + + +@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"]) +def index_or_series_or_array(request): + """ + Fixture to parametrize over Index, Series, and ExtensionArray + """ + return request.param + + +@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +box_with_array2 = box_with_array + + +@pytest.fixture +def dict_subclass() -> type[dict]: + """ + Fixture for a dictionary subclass. + """ + + class TestSubDict(dict): + def __init__(self, *args, **kwargs) -> None: + dict.__init__(self, *args, **kwargs) + + return TestSubDict + + +@pytest.fixture +def non_dict_mapping_subclass() -> type[abc.Mapping]: + """ + Fixture for a non-mapping dictionary subclass. + """ + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict) -> None: + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self) -> Iterator: + return self._data.__iter__() + + def __len__(self) -> int: + return self._data.__len__() + + return TestNonDictMapping + + +# ---------------------------------------------------------------- +# Indices +# ---------------------------------------------------------------- +@pytest.fixture +def multiindex_year_month_day_dataframe_random_data(): + """ + DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data + """ + tdf = DataFrame( + np.random.default_rng(2).standard_normal((100, 4)), + columns=Index(list("ABCD"), dtype=object), + index=date_range("2000-01-01", periods=100, freq="B"), + ) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use int64 Index, to make sure things work + ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd + + +@pytest.fixture +def lexsorted_two_level_string_multiindex() -> MultiIndex: + """ + 2-level MultiIndex, lexsorted, with string names. + """ + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + + +@pytest.fixture +def multiindex_dataframe_random_data( + lexsorted_two_level_string_multiindex, +) -> DataFrame: + """DataFrame with 2 level MultiIndex with random data""" + index = lexsorted_two_level_string_multiindex + return DataFrame( + np.random.default_rng(2).standard_normal((10, 3)), + index=index, + columns=Index(["A", "B", "C"], name="exp"), + ) + + +def _create_multiindex(): + """ + MultiIndex used to test the general functionality of this object + """ + + # See Also: tests.multi.conftest.idx + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + return MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + + +def _create_mi_with_dt64tz_level(): + """ + MultiIndex with a level that is a tzaware DatetimeIndex. + """ + # GH#8367 round trip with pickle + return MultiIndex.from_product( + [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")], + names=["one", "two", "three"], + ) + + +indices_dict = { + "string": Index([f"pandas_{i}" for i in range(100)]), + "datetime": date_range("2020-01-01", periods=100), + "datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"), + "period": period_range("2020-01-01", periods=100, freq="D"), + "timedelta": timedelta_range(start="1 day", periods=100, freq="D"), + "range": RangeIndex(100), + "int8": Index(np.arange(100), dtype="int8"), + "int16": Index(np.arange(100), dtype="int16"), + "int32": Index(np.arange(100), dtype="int32"), + "int64": Index(np.arange(100), dtype="int64"), + "uint8": Index(np.arange(100), dtype="uint8"), + "uint16": Index(np.arange(100), dtype="uint16"), + "uint32": Index(np.arange(100), dtype="uint32"), + "uint64": Index(np.arange(100), dtype="uint64"), + "float32": Index(np.arange(100), dtype="float32"), + "float64": Index(np.arange(100), dtype="float64"), + "bool-object": Index([True, False] * 5, dtype=object), + "bool-dtype": Index([True, False] * 5, dtype=bool), + "complex64": Index( + np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64") + ), + "complex128": Index( + np.arange(100, dtype="complex128") + 1.0j * np.arange(100, dtype="complex128") + ), + "categorical": CategoricalIndex(list("abcd") * 25), + "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), + "multi": _create_multiindex(), + "repeats": Index([0, 0, 1, 1, 2, 2]), + "nullable_int": Index(np.arange(100), dtype="Int64"), + "nullable_uint": Index(np.arange(100), dtype="UInt16"), + "nullable_float": Index(np.arange(100), dtype="Float32"), + "nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"), + "string-python": Index( + pd.array([f"pandas_{i}" for i in range(100)], dtype="string[python]") + ), +} +if has_pyarrow: + idx = Index(pd.array([f"pandas_{i}" for i in range(100)], dtype="string[pyarrow]")) + indices_dict["string-pyarrow"] = idx + + +@pytest.fixture(params=indices_dict.keys()) +def index(request): + """ + Fixture for many "simple" kinds of indices. + + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() + + +@pytest.fixture( + params=[ + key for key, value in indices_dict.items() if not isinstance(value, MultiIndex) + ] +) +def index_flat(request): + """ + index fixture, but excluding MultiIndex cases. + """ + key = request.param + return indices_dict[key].copy() + + +@pytest.fixture( + params=[ + key + for key, value in indices_dict.items() + if not ( + key.startswith(("int", "uint", "float")) + or key in ["range", "empty", "repeats", "bool-dtype"] + ) + and not isinstance(value, MultiIndex) + ] +) +def index_with_missing(request): + """ + Fixture for indices with missing values. + + Integer-dtype and empty cases are excluded because they cannot hold missing + values. + + MultiIndex is excluded because isna() is not defined for MultiIndex. + """ + + # GH 35538. Use deep copy to avoid illusive bug on np-dev + # GHA pipeline that writes into indices_dict despite copy + ind = indices_dict[request.param].copy(deep=True) + vals = ind.values.copy() + if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: + # For setting missing values in the top level of MultiIndex + vals = ind.tolist() + vals[0] = (None,) + vals[0][1:] + vals[-1] = (None,) + vals[-1][1:] + return MultiIndex.from_tuples(vals) + else: + vals[0] = None + vals[-1] = None + return type(ind)(vals) + + +# ---------------------------------------------------------------- +# Series' +# ---------------------------------------------------------------- +@pytest.fixture +def string_series() -> Series: + """ + Fixture for Series of floats with Index of unique strings + """ + return Series( + np.arange(30, dtype=np.float64) * 1.1, + index=Index([f"i_{i}" for i in range(30)], dtype=object), + name="series", + ) + + +@pytest.fixture +def object_series() -> Series: + """ + Fixture for Series of dtype object with Index of unique strings + """ + data = [f"foo_{i}" for i in range(30)] + index = Index([f"bar_{i}" for i in range(30)], dtype=object) + return Series(data, index=index, name="objects", dtype=object) + + +@pytest.fixture +def datetime_series() -> Series: + """ + Fixture for Series of floats with DatetimeIndex + """ + return Series( + np.random.default_rng(2).standard_normal(30), + index=date_range("2000-01-01", periods=30, freq="B"), + name="ts", + ) + + +def _create_series(index): + """Helper for the _series dict""" + size = len(index) + data = np.random.default_rng(2).standard_normal(size) + return Series(data, index=index, name="a", copy=False) + + +_series = { + f"series-with-{index_id}-index": _create_series(index) + for index_id, index in indices_dict.items() +} + + +@pytest.fixture +def series_with_simple_index(index) -> Series: + """ + Fixture for tests on series with changing types of indices. + """ + return _create_series(index) + + +_narrow_series = { + f"{dtype.__name__}-series": Series( + range(30), index=[f"i-{i}" for i in range(30)], name="a", dtype=dtype + ) + for dtype in tm.NARROW_NP_DTYPES +} + + +_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} + + +@pytest.fixture(params=_index_or_series_objs.keys()) +def index_or_series_obj(request): + """ + Fixture for tests on indexes, series and series with a narrow dtype + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_objs[request.param].copy(deep=True) + + +_typ_objects_series = { + f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES +} + + +_index_or_series_memory_objs = { + **indices_dict, + **_series, + **_narrow_series, + **_typ_objects_series, +} + + +@pytest.fixture(params=_index_or_series_memory_objs.keys()) +def index_or_series_memory_obj(request): + """ + Fixture for tests on indexes, series, series with a narrow dtype and + series with empty objects type + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_memory_objs[request.param].copy(deep=True) + + +# ---------------------------------------------------------------- +# DataFrames +# ---------------------------------------------------------------- +@pytest.fixture +def int_frame() -> DataFrame: + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + """ + return DataFrame( + np.ones((30, 4), dtype=np.int64), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) + + +@pytest.fixture +def float_frame() -> DataFrame: + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + return DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)]), + columns=Index(list("ABCD")), + ) + + +@pytest.fixture +def rand_series_with_duplicate_datetimeindex() -> Series: + """ + Fixture for Series with a DatetimeIndex that has duplicates. + """ + dates = [ + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + + return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) + + +# ---------------------------------------------------------------- +# Scalars +# ---------------------------------------------------------------- +@pytest.fixture( + params=[ + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), + (Period("2012-01", freq="M"), "period[M]"), + (Period("2012-02-01", freq="D"), "period[D]"), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(unit="s", tz="US/Eastern"), + ), + (Timedelta(seconds=500), "timedelta64[ns]"), + ] +) +def ea_scalar_and_dtype(request): + return request.param + + +# ---------------------------------------------------------------- +# Operators & Operations +# ---------------------------------------------------------------- + + +@pytest.fixture(params=tm.arithmetic_dunder_methods) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge, + operator.and_, + ops.rand_, + operator.xor, + ops.rxor, + operator.or_, + ops.ror_, + ] +) +def all_binary_operators(request): + """ + Fixture for operator and roperator arithmetic, comparison, and logical ops. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + ] +) +def all_arithmetic_functions(request): + """ + Fixture for operator and roperator arithmetic functions. + + Notes + ----- + This includes divmod and rdivmod, whereas all_arithmetic_operators + does not. + """ + return request.param + + +_all_numeric_reductions = [ + "count", + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", + "sem", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +_all_reductions = _all_numeric_reductions + _all_boolean_reductions + + +@pytest.fixture(params=_all_reductions) +def all_reductions(request): + """ + Fixture for all (boolean + numeric) reduction names. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ] +) +def comparison_op(request): + """ + Fixture for operator module comparison functions. + """ + return request.param + + +@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) +def compare_operators_no_eq_ne(request): + """ + Fixture for dunder names for compare operations except == and != + + * >= + * > + * < + * <= + """ + return request.param + + +@pytest.fixture( + params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] +) +def all_logical_operators(request): + """ + Fixture for dunder names for common logical operations + + * | + * & + * ^ + """ + return request.param + + +_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"] + + +@pytest.fixture(params=_all_numeric_accumulations) +def all_numeric_accumulations(request): + """ + Fixture for numeric accumulation names + """ + return request.param + + +# ---------------------------------------------------------------- +# Data sets/files +# ---------------------------------------------------------------- +@pytest.fixture +def strict_data_files(pytestconfig): + """ + Returns the configuration for the test setting `--no-strict-data-files`. + """ + return pytestconfig.getoption("--no-strict-data-files") + + +@pytest.fixture +def datapath(strict_data_files: str) -> Callable[..., str]: + """ + Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --no-strict-data-files option is not set. + """ + BASE_PATH = os.path.join(os.path.dirname(__file__), "") + + def deco(*args): + path = os.path.join(BASE_PATH, *args) + if not os.path.exists(path): + if strict_data_files: + raise ValueError( + f"Could not find file {path} and --no-strict-data-files is not set." + ) + pytest.skip(f"Could not find {path}.") + return path + + return deco + + +# ---------------------------------------------------------------- +# Time zones +# ---------------------------------------------------------------- +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + "+01:15", + "-02:15", + "UTC+01:15", + "UTC-02:15", + tzutc(), + tzlocal(), + FixedOffset(300), + FixedOffset(0), + FixedOffset(-300), + timezone.utc, + timezone(timedelta(hours=1)), + timezone(timedelta(hours=-1), name="foo"), +] +if zoneinfo is not None: + TIMEZONES.extend( + [ + zoneinfo.ZoneInfo("US/Pacific"), # type: ignore[list-item] + zoneinfo.ZoneInfo("UTC"), # type: ignore[list-item] + ] + ) +TIMEZONE_IDS = [repr(i) for i in TIMEZONES] + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) +@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) +def tz_naive_fixture(request): + """ + Fixture for trying timezones including default (None): {0} + """ + return request.param + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) +@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) +def tz_aware_fixture(request): + """ + Fixture for trying explicit timezones: {0} + """ + return request.param + + +# Generate cartesian product of tz_aware_fixture: +tz_aware_fixture2 = tz_aware_fixture + + +_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc] +if zoneinfo is not None: + _UTCS.append(zoneinfo.ZoneInfo("UTC")) + + +@pytest.fixture(params=_UTCS) +def utc_fixture(request): + """ + Fixture to provide variants of UTC timezone strings and tzinfo objects. + """ + return request.param + + +utc_fixture2 = utc_fixture + + +@pytest.fixture(params=["s", "ms", "us", "ns"]) +def unit(request): + """ + datetime64 units we support. + """ + return request.param + + +unit2 = unit + + +# ---------------------------------------------------------------- +# Dtypes +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.STRING_DTYPES) +def string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * str + * 'str' + * 'U' + """ + return request.param + + +@pytest.fixture( + params=[ + "string[python]", + pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), + ] +) +def nullable_string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture( + params=[ + "python", + pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")), + pytest.param("pyarrow_numpy", marks=td.skip_if_no("pyarrow")), + ] +) +def string_storage(request): + """ + Parametrized fixture for pd.options.mode.string_storage. + + * 'python' + * 'pyarrow' + * 'pyarrow_numpy' + """ + return request.param + + +@pytest.fixture( + params=[ + "numpy_nullable", + pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")), + ] +) +def dtype_backend(request): + """ + Parametrized fixture for pd.options.mode.string_storage. + + * 'python' + * 'pyarrow' + """ + return request.param + + +# Alias so we can test with cartesian product of string_storage +string_storage2 = string_storage + + +@pytest.fixture(params=tm.BYTES_DTYPES) +def bytes_dtype(request): + """ + Parametrized fixture for bytes dtypes. + + * bytes + * 'bytes' + """ + return request.param + + +@pytest.fixture(params=tm.OBJECT_DTYPES) +def object_dtype(request): + """ + Parametrized fixture for object dtypes. + + * object + * 'object' + """ + return request.param + + +@pytest.fixture( + params=[ + "object", + "string[python]", + pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), + pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")), + ] +) +def any_string_dtype(request): + """ + Parametrized fixture for string dtypes. + * 'object' + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture(params=tm.DATETIME64_DTYPES) +def datetime64_dtype(request): + """ + Parametrized fixture for datetime64 dtypes. + + * 'datetime64[ns]' + * 'M8[ns]' + """ + return request.param + + +@pytest.fixture(params=tm.TIMEDELTA64_DTYPES) +def timedelta64_dtype(request): + """ + Parametrized fixture for timedelta64 dtypes. + + * 'timedelta64[ns]' + * 'm8[ns]' + """ + return request.param + + +@pytest.fixture +def fixed_now_ts() -> Timestamp: + """ + Fixture emits fixed Timestamp.now() + """ + return Timestamp( # pyright: ignore[reportReturnType] + year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22 + ) + + +@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES) +def float_numpy_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.FLOAT_EA_DTYPES) +def float_ea_dtype(request): + """ + Parameterized fixture for float dtypes. + + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_FLOAT_DTYPES) +def any_float_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.COMPLEX_DTYPES) +def complex_dtype(request): + """ + Parameterized fixture for complex dtypes. + + * complex + * 'complex64' + * 'complex128' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) +def any_signed_int_numpy_dtype(request): + """ + Parameterized fixture for signed integer dtypes. + + * int + * 'int8' + * 'int16' + * 'int32' + * 'int64' + """ + return request.param + + +@pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES) +def any_unsigned_int_numpy_dtype(request): + """ + Parameterized fixture for unsigned integer dtypes. + + * 'uint8' + * 'uint16' + * 'uint32' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES) +def any_int_numpy_dtype(request): + """ + Parameterized fixture for any integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES) +def any_int_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_DTYPES) +def any_int_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES) +def any_numeric_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype and + any float ea dtypes. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + +# Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any], +# Type[object]]]" and "List[str]") +@pytest.fixture( + params=tm.ALL_INT_EA_DTYPES + + tm.FLOAT_EA_DTYPES + + tm.ALL_INT_PYARROW_DTYPES_STR_REPR + + tm.FLOAT_PYARROW_DTYPES_STR_REPR # type: ignore[operator] +) +def any_numeric_ea_and_arrow_dtype(request): + """ + Parameterized fixture for any nullable integer dtype and + any float ea dtypes. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + * 'uint8[pyarrow]' + * 'int8[pyarrow]' + * 'uint16[pyarrow]' + * 'int16[pyarrow]' + * 'uint32[pyarrow]' + * 'int32[pyarrow]' + * 'uint64[pyarrow]' + * 'int64[pyarrow]' + * 'float32[pyarrow]' + * 'float64[pyarrow]' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES) +def any_signed_int_ea_dtype(request): + """ + Parameterized fixture for any signed nullable integer dtype. + + * 'Int8' + * 'Int16' + * 'Int32' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES) +def any_real_numpy_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_REAL_DTYPES) +def any_real_numeric_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + + and associated ea dtypes. + """ + return request.param + + +@pytest.fixture(params=tm.ALL_NUMPY_DTYPES) +def any_numpy_dtype(request): + """ + Parameterized fixture for all numpy dtypes. + + * bool + * 'bool' + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * str + * 'str' + * 'U' + * bytes + * 'bytes' + * 'datetime64[ns]' + * 'M8[ns]' + * 'timedelta64[ns]' + * 'm8[ns]' + * object + * 'object' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES) +def any_real_nullable_dtype(request): + """ + Parameterized fixture for all real dtypes that can hold NA. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + * 'UInt8' + * 'UInt16' + * 'UInt32' + * 'UInt64' + * 'Int8' + * 'Int16' + * 'Int32' + * 'Int64' + * 'uint8[pyarrow]' + * 'uint16[pyarrow]' + * 'uint32[pyarrow]' + * 'uint64[pyarrow]' + * 'int8[pyarrow]' + * 'int16[pyarrow]' + * 'int32[pyarrow]' + * 'int64[pyarrow]' + * 'float[pyarrow]' + * 'double[pyarrow]' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_NUMERIC_DTYPES) +def any_numeric_dtype(request): + """ + Parameterized fixture for all numeric dtypes. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + +# categoricals are handled separately +_any_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("string", ["a", pd.NA, "c"]), + ("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), + ("mixed", ["a", np.nan, 2.0]), + ("floating", [1.0, np.nan, 2.0]), + ("integer", [1, np.nan, 2]), + ("mixed-integer-float", [1, np.nan, 2.0]), + ("decimal", [Decimal(1), np.nan, Decimal(2)]), + ("boolean", [True, np.nan, False]), + ("boolean", [True, pd.NA, False]), + ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), + ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), + ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), + ("complex", [1 + 1j, np.nan, 2 + 2j]), + # The following dtype is commented out due to GH 23554 + # ('timedelta64', [np.timedelta64(1, 'D'), + # np.nan, np.timedelta64(2, 'D')]), + ("timedelta", [timedelta(1), np.nan, timedelta(2)]), + ("time", [time(1), np.nan, time(2)]), + ("period", [Period(2013), pd.NaT, Period(2018)]), + ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), +] +ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id + + +@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) +def any_skipna_inferred_dtype(request): + """ + Fixture for all inferred dtypes from _libs.lib.infer_dtype + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + * 'mixed-integer-float' + * 'floating' + * 'integer' + * 'decimal' + * 'boolean' + * 'datetime64' + * 'datetime' + * 'date' + * 'timedelta' + * 'time' + * 'period' + * 'interval' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> from pandas._libs import lib + >>> + >>> def test_something(any_skipna_inferred_dtype): + ... inferred_dtype, values = any_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values + + +# ---------------------------------------------------------------- +# Misc +# ---------------------------------------------------------------- +@pytest.fixture +def ip(): + """ + Get an instance of IPython.InteractiveShell. + + Will raise a skip if IPython is not installed. + """ + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell + + # GH#35711 make sure sqlite history file handle is not leaked + from traitlets.config import Config # isort:skip + + c = Config() + c.HistoryManager.hist_file = ":memory:" + + return InteractiveShell(config=c) + + +@pytest.fixture +def mpl_cleanup(): + """ + Ensure Matplotlib is cleaned up around a test. + + Before a test is run: + + 1) Set the backend to "template" to avoid requiring a GUI. + + After a test is run: + + 1) Reset units registry + 2) Reset rc_context + 3) Close all figures + + See matplotlib/testing/decorators.py#L24. + """ + mpl = pytest.importorskip("matplotlib") + mpl_units = pytest.importorskip("matplotlib.units") + plt = pytest.importorskip("matplotlib.pyplot") + orig_units_registry = mpl_units.registry.copy() + try: + with mpl.rc_context(): + mpl.use("template") + yield + finally: + mpl_units.registry.clear() + mpl_units.registry.update(orig_units_registry) + plt.close("all") + # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 + gc.collect(1) + + +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) and o != "Tick" + ] +) +def tick_classes(request): + """ + Fixture for Tick based datetime offsets available for a time series. + """ + return request.param + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + """ + return request.param + + +@pytest.fixture( + params=[ + ("foo", None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + # possibly-matching NAs + (np.nan, np.nan, np.nan), + (np.nan, pd.NaT, None), + (np.nan, pd.NA, None), + (pd.NA, pd.NA, pd.NA), + ] +) +def names(request) -> tuple[Hashable, Hashable, Hashable]: + """ + A 3-tuple of names, the first two for operands, the last for a result. + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) +def indexer_sli(request): + """ + Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.loc, tm.iloc]) +def indexer_li(request): + """ + Parametrize over loc.__getitem__, iloc.__getitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.iloc]) +def indexer_si(request): + """ + Parametrize over __setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc]) +def indexer_sl(request): + """ + Parametrize over __setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.at, tm.loc]) +def indexer_al(request): + """ + Parametrize over at.__setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.iat, tm.iloc]) +def indexer_ial(request): + """ + Parametrize over iat.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def performance_warning(request) -> Iterator[bool | type[Warning]]: + """ + Fixture to check if performance warnings are enabled. Either produces + ``PerformanceWarning`` if they are enabled, otherwise ``False``. + """ + with pd.option_context("mode.performance_warnings", request.param): + yield pd.errors.PerformanceWarning if request.param else False + + +@pytest.fixture +def using_infer_string() -> bool: + """ + Fixture to check if infer string option is enabled. + """ + return pd.options.future.infer_string is True + + +warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] +if zoneinfo is not None: + warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw")) # type: ignore[arg-type] + + +@pytest.fixture(params=warsaws) +def warsaw(request) -> str: + """ + tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo. + """ + return request.param + + +@pytest.fixture +def arrow_string_storage(): + """ + Fixture that lists possible PyArrow values for StringDtype storage field. + """ + return ("pyarrow", "pyarrow_numpy") + + +@pytest.fixture +def temp_file(tmp_path): + """ + Generate a unique file for testing use. See link for removal policy. + https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html#the-default-base-temporary-directory + """ + file_path = tmp_path / str(uuid.uuid4()) + file_path.touch() + return file_path diff --git a/pandas/tests/pytest.ini b/pandas/tests/pytest.ini new file mode 100644 index 0000000000000..182e55b63fcd9 --- /dev/null +++ b/pandas/tests/pytest.ini @@ -0,0 +1,42 @@ +[pytest] +# sync minversion with pyproject.toml & install.rst +minversion = 7.3.2 +addopts = --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml +empty_parameter_set_mark = fail_at_collect +xfail_strict = true +testpaths = pandas +doctest_optionflags = + NORMALIZE_WHITESPACE + IGNORE_EXCEPTION_DETAIL + ELLIPSIS + +filterwarnings = + error:::pandas + error::ResourceWarning + error::pytest.PytestUnraisableExceptionWarning + # TODO(PY311-minimum): Specify EncodingWarning + # Ignore 3rd party EncodingWarning but raise on pandas' + ignore:.*encoding.* argument not specified + error:.*encoding.* argument not specified::pandas + ignore:.*ssl.SSLSocket:pytest.PytestUnraisableExceptionWarning + ignore:.*ssl.SSLSocket:ResourceWarning + # GH 44844: Can remove once minimum matplotlib version >= 3.7 + ignore:.*FileIO:pytest.PytestUnraisableExceptionWarning + ignore:.*BufferedRandom:ResourceWarning + ignore::ResourceWarning:asyncio + # From plotting doctests + ignore:More than 20 figures have been opened:RuntimeWarning + ignore:.*urllib3:DeprecationWarning:botocore + ignore:Setuptools is replacing distutils.:UserWarning:_distutils_hack + # https://github.com/PyTables/PyTables/issues/822 + ignore:a closed node found in the registry:UserWarning:tables + +junit_family = xunit2 +markers = + single_cpu: tests that should run on a single cpu only, + slow: mark a test as slow, + network: mark a test as network, + db: tests requiring a database (mysql or postgres), + clipboard: mark a pd.read_clipboard test, + arm_slow: mark a test as slow for arm64 architecture, + skip_ubsan: Tests known to fail UBSAN check diff --git a/pandas/util/_exceptions.py b/pandas/util/_exceptions.py index 5f50838d37315..056f73c6b4f08 100644 --- a/pandas/util/_exceptions.py +++ b/pandas/util/_exceptions.py @@ -42,6 +42,16 @@ def find_stack_level() -> int: pkg_dir = os.path.dirname(pd.__file__) test_dir = os.path.join(pkg_dir, "tests") + # Note: we also want to account for stuff being in the pandas_tests test_dir + try: + # can't use import_optional_dependency cause it uses find_stack_level and we'd + # have ourselves a circular import :( + import pandas_tests # pyright: ignore [reportMissingImports] + + pd_tests_dir = os.path.dirname(pandas_tests.__file__) + except ImportError: + pd_tests_dir = None + # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow frame: FrameType | None = inspect.currentframe() try: @@ -49,6 +59,8 @@ def find_stack_level() -> int: while frame: filename = inspect.getfile(frame) if filename.startswith(pkg_dir) and not filename.startswith(test_dir): + if pd_tests_dir is not None and filename.startswith(pd_tests_dir): + break frame = frame.f_back n += 1 else: diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index 494f306ec807d..ade2955a772f5 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -6,11 +6,10 @@ import os import sys +from typing import cast from pandas.compat._optional import import_optional_dependency -PKG = os.path.dirname(os.path.dirname(__file__)) - def test(extra_args: list[str] | None = None, run_doctests: bool = False) -> None: """ @@ -39,12 +38,17 @@ def test(extra_args: list[str] | None = None, run_doctests: bool = False) -> Non if not isinstance(extra_args, list): extra_args = [extra_args] cmd = extra_args + # Don't require pandas_tests if only running doctests if run_doctests: + PKG = os.path.dirname(os.path.dirname(__file__)) cmd = [ "--doctest-modules", "--doctest-cython", f"--ignore={os.path.join(PKG, 'tests')}", ] + else: + pandas_tests = import_optional_dependency("pandas_tests") + PKG = os.path.dirname(cast(str, pandas_tests.__file__)) cmd += [PKG] joined = " ".join(cmd) print(f"running: pytest {joined}") diff --git a/pyproject.toml b/pyproject.toml index 5f5b013ca8461..ecd494523000c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -165,7 +165,7 @@ repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" -before-test = "apk update && apk add musl-locales" +before-all = "apk update && apk add musl-locales" [[tool.cibuildwheel.overrides]] select = "*-win*" @@ -468,50 +468,6 @@ disable = [ "using-constant-test" ] -[tool.pytest.ini_options] -# sync minversion with pyproject.toml & install.rst -minversion = "7.3.2" -addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" -empty_parameter_set_mark = "fail_at_collect" -xfail_strict = true -testpaths = "pandas" -doctest_optionflags = [ - "NORMALIZE_WHITESPACE", - "IGNORE_EXCEPTION_DETAIL", - "ELLIPSIS", -] -filterwarnings = [ - "error:::pandas", - "error::ResourceWarning", - "error::pytest.PytestUnraisableExceptionWarning", - # TODO(PY311-minimum): Specify EncodingWarning - # Ignore 3rd party EncodingWarning but raise on pandas' - "ignore:.*encoding.* argument not specified", - "error:.*encoding.* argument not specified::pandas", - "ignore:.*ssl.SSLSocket:pytest.PytestUnraisableExceptionWarning", - "ignore:.*ssl.SSLSocket:ResourceWarning", - # GH 44844: Can remove once minimum matplotlib version >= 3.7 - "ignore:.*FileIO:pytest.PytestUnraisableExceptionWarning", - "ignore:.*BufferedRandom:ResourceWarning", - "ignore::ResourceWarning:asyncio", - # From plotting doctests - "ignore:More than 20 figures have been opened:RuntimeWarning", - "ignore:.*urllib3:DeprecationWarning:botocore", - "ignore:Setuptools is replacing distutils.:UserWarning:_distutils_hack", - # https://github.com/PyTables/PyTables/issues/822 - "ignore:a closed node found in the registry:UserWarning:tables", -] -junit_family = "xunit2" -markers = [ - "single_cpu: tests that should run on a single cpu only", - "slow: mark a test as slow", - "network: mark a test as network", - "db: tests requiring a database (mysql or postgres)", - "clipboard: mark a pd.read_clipboard test", - "arm_slow: mark a test as slow for arm64 architecture", - "skip_ubsan: Tests known to fail UBSAN check", -] - [tool.mypy] # Import discovery mypy_path = "typings"