diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 6ea4c213e85c8..8ad67ab2df410 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -29,11 +29,6 @@ detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will arise and we wish to also consider that "missing" or "not available" or "NA". -.. note:: - - If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, - you can set ``pandas.options.mode.use_inf_as_na = True``. - .. _missing.isna: .. ipython:: python diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 92124a536fe26..d6a4e20174011 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -274,6 +274,7 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) .. --------------------------------------------------------------------------- diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 920a33d39e1d1..33e5ac8d2a26c 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -438,7 +438,7 @@ def __init__(self, *args) -> None: self.ops = list(zip(args[::2], args[1::2])) def __enter__(self) -> None: - self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops] + self.undo = [(pat, _get_option(pat)) for pat, val in self.ops] for pat, val in self.ops: _set_option(pat, val, silent=True) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5f1aa3a1e9535..3f662073f0357 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -411,6 +411,8 @@ def is_terminal() -> bool: True means treat None, NaN, INF, -INF as NA (old way), False means None and NaN are null, but INF, -INF are not NA (new way). + + This option is deprecated in pandas 2.1.0 and will be removed in 3.0. """ # We don't want to start importing everything at the global context level @@ -426,6 +428,12 @@ def use_inf_as_na_cb(key) -> None: with cf.config_prefix("mode"): cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) +cf.deprecate_option( + # GH#51684 + "mode.use_inf_as_na", + "use_inf_as_na option is deprecated and will be removed in a future " + "version. Convert inf values to NaN before operating instead.", +) data_manager_doc = """ : string diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4c2124182ea5..044466f348e08 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10740,8 +10740,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): """ Count non-NA cells for each column or row. - The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending - on `pandas.options.mode.use_inf_as_na`) are considered NA. + The values `None`, `NaN`, `NaT`, ``pandas.NA`` are considered NA. Parameters ---------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 68e9006e85f7a..09288448cec57 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2820,8 +2820,7 @@ def isna(self) -> npt.NDArray[np.bool_]: NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get mapped to ``True`` values. Everything else get mapped to ``False`` values. Characters such as - empty strings `''` or :attr:`numpy.inf` are not considered NA values - (unless you set ``pandas.options.mode.use_inf_as_na = True``). + empty strings `''` or :attr:`numpy.inf` are not considered NA values. Returns ------- @@ -2876,8 +2875,7 @@ def notna(self) -> npt.NDArray[np.bool_]: Return a boolean same-sized object indicating if the values are not NA. Non-missing values get mapped to ``True``. Characters such as empty - strings ``''`` or :attr:`numpy.inf` are not considered NA values - (unless you set ``pandas.options.mode.use_inf_as_na = True``). + strings ``''`` or :attr:`numpy.inf` are not considered NA values. NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` values. diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index d7830248cb73c..892795b89c1f5 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -137,18 +137,20 @@ def test_fillna_array(self): ) def test_use_inf_as_na(self, values, expected): # https://github.com/pandas-dev/pandas/issues/33594 - with pd.option_context("mode.use_inf_as_na", True): - cat = Categorical(values) - result = cat.isna() - tm.assert_numpy_array_equal(result, expected) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + cat = Categorical(values) + result = cat.isna() + tm.assert_numpy_array_equal(result, expected) - result = Series(cat).isna() - expected = Series(expected) - tm.assert_series_equal(result, expected) + result = Series(cat).isna() + expected = Series(expected) + tm.assert_series_equal(result, expected) - result = DataFrame(cat).isna() - expected = DataFrame(expected) - tm.assert_frame_equal(result, expected) + result = DataFrame(cat).isna() + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "values, expected", @@ -164,17 +166,19 @@ def test_use_inf_as_na_outside_context(self, values, expected): # Using isna directly for Categorical will fail in general here cat = Categorical(values) - with pd.option_context("mode.use_inf_as_na", True): - result = isna(cat) - tm.assert_numpy_array_equal(result, expected) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + result = isna(cat) + tm.assert_numpy_array_equal(result, expected) - result = isna(Series(cat)) - expected = Series(expected) - tm.assert_series_equal(result, expected) + result = isna(Series(cat)) + expected = Series(expected) + tm.assert_series_equal(result, expected) - result = isna(DataFrame(cat)) - expected = DataFrame(expected) - tm.assert_frame_equal(result, expected) + result = isna(DataFrame(cat)) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "a1, a2, categories", diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 7e4869589cee6..5ca95bd00f136 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -492,17 +492,19 @@ def test_value_counts_with_normalize(dtype): def test_use_inf_as_na(values, expected, dtype): # https://github.com/pandas-dev/pandas/issues/33655 values = pd.array(values, dtype=dtype) - with pd.option_context("mode.use_inf_as_na", True): - result = values.isna() - tm.assert_numpy_array_equal(result, expected) - - result = pd.Series(values).isna() - expected = pd.Series(expected) - tm.assert_series_equal(result, expected) - - result = pd.DataFrame(values).isna() - expected = pd.DataFrame(expected) - tm.assert_frame_equal(result, expected) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + result = values.isna() + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(values).isna() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.DataFrame(values).isna() + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) def test_memory_usage(dtype): diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 0158e7589b214..bd8dffd2abe1f 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -53,21 +53,24 @@ def test_notna_notnull(notna_f): assert not notna_f(None) assert not notna_f(np.NaN) - with cf.option_context("mode.use_inf_as_na", False): - assert notna_f(np.inf) - assert notna_f(-np.inf) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notna_f(arr) - assert result.all() + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.all() - with cf.option_context("mode.use_inf_as_na", True): - assert not notna_f(np.inf) - assert not notna_f(-np.inf) + with tm.assert_produces_warning(FutureWarning, match=msg): + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notna_f(arr) - assert result.sum() == 2 + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.sum() == 2 @pytest.mark.parametrize("null_func", [notna, notnull, isna, isnull]) @@ -82,8 +85,10 @@ def test_notna_notnull(notna_f): ], ) def test_null_check_is_series(null_func, ser): - with cf.option_context("mode.use_inf_as_na", False): - assert isinstance(null_func(ser), Series) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with cf.option_context("mode.use_inf_as_na", False): + assert isinstance(null_func(ser), Series) class TestIsNA: @@ -214,8 +219,10 @@ def test_isna_old_datetimelike(self): objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")] for obj in objs: - with cf.option_context("mode.use_inf_as_na", True): - result = isna(obj) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with cf.option_context("mode.use_inf_as_na", True): + result = isna(obj) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 8a53c06e0b7bf..c0ea03d057637 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -154,6 +154,8 @@ def test_fillna_fill_other(self, data): def test_use_inf_as_na_no_effect(self, data_missing): ser = pd.Series(data_missing) expected = ser.isna() - with pd.option_context("mode.use_inf_as_na", True): - result = ser.isna() + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + result = ser.isna() self.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index f3b77c27d75bd..6f21bd4c4b438 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -96,10 +96,12 @@ def test_dtypes_gh8722(self, float_string_frame): tm.assert_series_equal(result, expected) # compat, GH 8722 - with option_context("use_inf_as_na", True): - df = DataFrame([[1]]) - result = df.dtypes - tm.assert_series_equal(result, Series({0: np.dtype("int64")})) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with option_context("use_inf_as_na", True): + df = DataFrame([[1]]) + result = df.dtypes + tm.assert_series_equal(result, Series({0: np.dtype("int64")})) def test_dtypes_timedeltas(self): df = DataFrame( diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index d7de369703ae9..38977470f455c 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -769,8 +769,10 @@ def test_sort_index_use_inf_as_na(self): {"col1": [1, 2, 3], "col2": [3, 4, 5]}, index=pd.date_range("2020", periods=3), ) - with pd.option_context("mode.use_inf_as_na", True): - result = expected.sort_index() + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + result = expected.sort_index() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index b42af8cdfcd8c..42e018c781111 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -400,30 +400,36 @@ def test_to_records_with_na_record(self): def test_to_records_with_inf_as_na_record(self): # GH 48526 - with option_context("use_inf_as_na", True): - df = DataFrame( - [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf] - ) - df["record"] = df[[np.nan, np.inf]].to_records() - expected = """ NaN inf record + expected = """ NaN inf record 0 NaN b [0, inf, b] 1 NaN NaN [1, nan, nan] 2 e f [2, e, f]""" - result = repr(df) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with option_context("use_inf_as_na", True): + df = DataFrame( + [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], + columns=[np.nan, np.inf], + ) + df["record"] = df[[np.nan, np.inf]].to_records() + result = repr(df) assert result == expected def test_to_records_with_inf_record(self): # GH 48526 - with option_context("use_inf_as_na", False): - df = DataFrame( - [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf] - ) - df["record"] = df[[np.nan, np.inf]].to_records() - expected = """ NaN inf record + expected = """ NaN inf record 0 inf b [0, inf, b] 1 NaN NaN [1, nan, nan] 2 e f [2, e, f]""" - result = repr(df) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with option_context("use_inf_as_na", False): + df = DataFrame( + [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], + columns=[np.nan, np.inf], + ) + df["record"] = df[[np.nan, np.inf]].to_records() + result = repr(df) assert result == expected def test_masked_ea_with_formatter(self): diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index d43fb2f5187e1..488b9d75affe1 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -62,7 +62,9 @@ def test_read_csv_with_use_inf_as_na(all_parsers): # https://github.com/pandas-dev/pandas/issues/35493 parser = all_parsers data = "1.0\nNaN\n3.0" - with option_context("use_inf_as_na", True): - result = parser.read_csv(StringIO(data), header=None) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with option_context("use_inf_as_na", True): + result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b02f8a19c77cb..4ea3c75cb684a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -562,8 +562,10 @@ def test_sum_inf(self): arr = np.random.randn(100, 100).astype("f4") arr[:, 2] = np.inf - with pd.option_context("mode.use_inf_as_na", True): - tm.assert_almost_equal(s.sum(), s2.sum()) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + tm.assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) assert np.isinf(res).all() @@ -1102,13 +1104,15 @@ def test_idxminmax_with_inf(self): assert s.idxmax() == 2 assert np.isnan(s.idxmax(skipna=False)) - # Using old-style behavior that treats floating point nan, -inf, and - # +inf as missing - with pd.option_context("mode.use_inf_as_na", True): - assert s.idxmin() == 0 - assert np.isnan(s.idxmin(skipna=False)) - assert s.idxmax() == 0 - np.isnan(s.idxmax(skipna=False)) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context("mode.use_inf_as_na", True): + assert s.idxmin() == 0 + assert np.isnan(s.idxmin(skipna=False)) + assert s.idxmax() == 0 + np.isnan(s.idxmax(skipna=False)) def test_sum_uint64(self): # GH 53401 diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 16a8b06e1a170..90984a2e65cba 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -5,6 +5,7 @@ Categorical, Series, ) +import pandas._testing as tm class TestSeriesCount: @@ -15,9 +16,13 @@ def test_count(self, datetime_series): assert datetime_series.count() == np.isfinite(datetime_series).sum() + def test_count_inf_as_na(self): # GH#29478 - with pd.option_context("use_inf_as_na", True): - assert Series([pd.Timestamp("1990/1/1")]).count() == 1 + ser = Series([pd.Timestamp("1990/1/1")]) + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("use_inf_as_na", True): + assert ser.count() == 1 def test_count_categorical(self): ser = Series( diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6cf67e22b6f19..3f0078d3c1487 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -27,9 +27,11 @@ def test_categorical_nan_handling(self): def test_isna_for_inf(self): s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) - with pd.option_context("mode.use_inf_as_na", True): - r = s.isna() - dr = s.dropna() + msg = "use_inf_as_na option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.use_inf_as_na", True): + r = s.isna() + dr = s.dropna() e = Series([False, True, True, True, False]) de = Series(["a", 1.0], index=[0, 4]) tm.assert_series_equal(r, e)