From 391e42dfecc39cd79f3370f51ea9382db144b7d2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 4 Mar 2021 13:24:35 +0100 Subject: [PATCH 1/4] TST: use env variable instead of pytest option for testing ArrayManager --- .github/workflows/ci.yml | 34 +++++++++++++++++---------------- pandas/core/config_init.py | 8 +++++++- pandas/util/_test_decorators.py | 11 ++++------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c03722e32fea9..06a06484b921a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -150,23 +150,25 @@ jobs: uses: ./.github/actions/setup - name: Run tests + env: + PANDAS_DATA_MANAGER: array run: | source activate pandas-dev - pytest pandas/tests/frame/methods --array-manager - pytest pandas/tests/frame/test_constructors.py --array-manager - pytest pandas/tests/frame/constructors/ --array-manager - pytest pandas/tests/frame/test_reductions.py --array-manager - pytest pandas/tests/reductions/ --array-manager - pytest pandas/tests/generic/test_generic.py --array-manager - pytest pandas/tests/arithmetic/ --array-manager - pytest pandas/tests/groupby/ --array-manager - pytest pandas/tests/resample/ --array-manager - pytest pandas/tests/reshape/merge --array-manager + pytest pandas/tests/frame/methods + pytest pandas/tests/frame/test_constructors.py + pytest pandas/tests/frame/constructors/ + pytest pandas/tests/frame/test_reductions.py + pytest pandas/tests/reductions/ + pytest pandas/tests/generic/test_generic.py + pytest pandas/tests/arithmetic/ + pytest pandas/tests/groupby/ + pytest pandas/tests/resample/ + pytest pandas/tests/reshape/merge # indexing subset (temporary since other tests don't pass yet) - pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager - pytest pandas/tests/frame/indexing/test_where.py --array-manager - pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index --array-manager - pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager - pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager - pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager + pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean + pytest pandas/tests/frame/indexing/test_where.py + pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index + pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns + pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups + pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 56ef1ea28ed1b..9e70d3487bbe0 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,6 +9,7 @@ module is imported, register them here rather than in the module. """ +import os import warnings import pandas._config.config as cf @@ -478,6 +479,11 @@ def use_inf_as_na_cb(key): _use_inf_as_na(key) +# Get the default from an environment variable, if set, otherwise defaults to "block" +# This environment variable can be set for testing +_data_manager_default = os.environ.get("PANDAS_DATA_MANAGER", "block") + + with cf.config_prefix("mode"): cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) cf.register_option( @@ -485,7 +491,7 @@ def use_inf_as_na_cb(key): ) cf.register_option( "data_manager", - "block", + _data_manager_default, "Internal data manager type", validator=is_one_of_factory(["block", "array"]), ) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index fd8f62331dc38..c39647522aaf1 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -35,6 +35,8 @@ def test_foo(): import numpy as np import pytest +from pandas._config import get_option + from pandas.compat import ( IS64, is_platform_windows, @@ -285,16 +287,11 @@ def async_mark(): return async_mark -# Note: we are using a string as condition (and not for example -# `get_option("mode.data_manager") == "array"`) because this needs to be -# evaluated at test time (otherwise this boolean condition gets evaluated -# at import time, when the pd.options.mode.data_manager has not yet been set) - skip_array_manager_not_yet_implemented = pytest.mark.skipif( - "config.getvalue('--array-manager')", reason="JSON C code relies on Blocks" + get_option("mode.data_manager") == "array", reason="JSON C code relies on Blocks" ) skip_array_manager_invalid_test = pytest.mark.skipif( - "config.getvalue('--array-manager')", + get_option("mode.data_manager") == "array", reason="Test that relies on BlockManager internals or specific behaviour", ) From 070a7f97f3d273a88473f96166c1dd84351a12fc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 4 Mar 2021 15:08:19 +0100 Subject: [PATCH 2/4] inline default --- pandas/core/config_init.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 9e70d3487bbe0..b0c8bb100602b 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -479,11 +479,6 @@ def use_inf_as_na_cb(key): _use_inf_as_na(key) -# Get the default from an environment variable, if set, otherwise defaults to "block" -# This environment variable can be set for testing -_data_manager_default = os.environ.get("PANDAS_DATA_MANAGER", "block") - - with cf.config_prefix("mode"): cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) cf.register_option( @@ -491,7 +486,9 @@ def use_inf_as_na_cb(key): ) cf.register_option( "data_manager", - _data_manager_default, + # Get the default from an environment variable, if set, otherwise defaults + # to "block". This environment variable can be set for testing. + os.environ.get("PANDAS_DATA_MANAGER", "block"), "Internal data manager type", validator=is_one_of_factory(["block", "array"]), ) From f8da5a7f5ad15e247f17eb103911a2a9bba5172b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 4 Mar 2021 15:14:45 +0100 Subject: [PATCH 3/4] better docs --- pandas/core/config_init.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index b0c8bb100602b..fd49ac0176ce4 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -484,19 +484,31 @@ def use_inf_as_na_cb(key): cf.register_option( "use_inf_as_null", False, use_inf_as_null_doc, cb=use_inf_as_na_cb ) + + +cf.deprecate_option( + "mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na" +) + + +data_manager_doc = """ +: string + Internal data manager type; can be "block" or "array". Defaults to "block", + unless overridden by the 'PANDAS_DATA_MANAGER' environment variable (needs + to be set before pandas is imported). +""" + + +with cf.config_prefix("mode"): cf.register_option( "data_manager", # Get the default from an environment variable, if set, otherwise defaults # to "block". This environment variable can be set for testing. os.environ.get("PANDAS_DATA_MANAGER", "block"), - "Internal data manager type", + data_manager_doc, validator=is_one_of_factory(["block", "array"]), ) -cf.deprecate_option( - "mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na" -) - # user warnings chained_assignment = """ From 3f19b0f58a8f64add9acd059b4fc2e03dbdcd000 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 4 Mar 2021 15:16:12 +0100 Subject: [PATCH 4/4] remove pytest option --- pandas/conftest.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 426cbf6a65aa5..b427375c064c8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -100,19 +100,6 @@ def pytest_addoption(parser): action="store_true", help="Fail if a test is skipped for missing data file.", ) - parser.addoption( - "--array-manager", - "--am", - action="store_true", - help="Use the experimental ArrayManager as default data manager.", - ) - - -def pytest_sessionstart(session): - # Note: we need to set the option here and not in pytest_runtest_setup below - # to ensure this is run before creating fixture data - if session.config.getoption("--array-manager"): - pd.options.mode.data_manager = "array" def pytest_runtest_setup(item):