Skip to content

DEPR: use_inf_as_na #53494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ detect this value with data of different types: floating point, integer,
boolean, and general object. In many cases, however, the Python ``None`` will
arise and we wish to also consider that "missing" or "not available" or "NA".

.. note::

If you want to consider ``inf`` and ``-inf`` to be "NA" in computations,
you can set ``pandas.options.mode.use_inf_as_na = True``.

.. _missing.isna:

.. ipython:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ Deprecations
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def __init__(self, *args) -> None:
self.ops = list(zip(args[::2], args[1::2]))

def __enter__(self) -> None:
self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops]
self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]

for pat, val in self.ops:
_set_option(pat, val, silent=True)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,8 @@ def is_terminal() -> bool:
True means treat None, NaN, INF, -INF as NA (old way),
False means None and NaN are null, but INF, -INF are not NA
(new way).

This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
"""

# We don't want to start importing everything at the global context level
Expand All @@ -426,6 +428,12 @@ def use_inf_as_na_cb(key) -> None:
with cf.config_prefix("mode"):
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)

cf.deprecate_option(
# GH#51684
"mode.use_inf_as_na",
"use_inf_as_na option is deprecated and will be removed in a future "
"version. Convert inf values to NaN before operating instead.",
)

data_manager_doc = """
: string
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10740,8 +10740,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
"""
Count non-NA cells for each column or row.

The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending
on `pandas.options.mode.use_inf_as_na`) are considered NA.
The values `None`, `NaN`, `NaT`, ``pandas.NA`` are considered NA.

Parameters
----------
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2820,8 +2820,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
mapped to ``True`` values.
Everything else get mapped to ``False`` values. Characters such as
empty strings `''` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
empty strings `''` or :attr:`numpy.inf` are not considered NA values.

Returns
-------
Expand Down Expand Up @@ -2876,8 +2875,7 @@ def notna(self) -> npt.NDArray[np.bool_]:

Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to ``True``. Characters such as empty
strings ``''`` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
values.

Expand Down
42 changes: 23 additions & 19 deletions pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,20 @@ def test_fillna_array(self):
)
def test_use_inf_as_na(self, values, expected):
# https://github.com/pandas-dev/pandas/issues/33594
with pd.option_context("mode.use_inf_as_na", True):
cat = Categorical(values)
result = cat.isna()
tm.assert_numpy_array_equal(result, expected)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
cat = Categorical(values)
result = cat.isna()
tm.assert_numpy_array_equal(result, expected)

result = Series(cat).isna()
expected = Series(expected)
tm.assert_series_equal(result, expected)
result = Series(cat).isna()
expected = Series(expected)
tm.assert_series_equal(result, expected)

result = DataFrame(cat).isna()
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = DataFrame(cat).isna()
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"values, expected",
Expand All @@ -164,17 +166,19 @@ def test_use_inf_as_na_outside_context(self, values, expected):
# Using isna directly for Categorical will fail in general here
cat = Categorical(values)

with pd.option_context("mode.use_inf_as_na", True):
result = isna(cat)
tm.assert_numpy_array_equal(result, expected)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
result = isna(cat)
tm.assert_numpy_array_equal(result, expected)

result = isna(Series(cat))
expected = Series(expected)
tm.assert_series_equal(result, expected)
result = isna(Series(cat))
expected = Series(expected)
tm.assert_series_equal(result, expected)

result = isna(DataFrame(cat))
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = isna(DataFrame(cat))
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"a1, a2, categories",
Expand Down
24 changes: 13 additions & 11 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,17 +492,19 @@ def test_value_counts_with_normalize(dtype):
def test_use_inf_as_na(values, expected, dtype):
# https://github.com/pandas-dev/pandas/issues/33655
values = pd.array(values, dtype=dtype)
with pd.option_context("mode.use_inf_as_na", True):
result = values.isna()
tm.assert_numpy_array_equal(result, expected)

result = pd.Series(values).isna()
expected = pd.Series(expected)
tm.assert_series_equal(result, expected)

result = pd.DataFrame(values).isna()
expected = pd.DataFrame(expected)
tm.assert_frame_equal(result, expected)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
result = values.isna()
tm.assert_numpy_array_equal(result, expected)

result = pd.Series(values).isna()
expected = pd.Series(expected)
tm.assert_series_equal(result, expected)

result = pd.DataFrame(values).isna()
expected = pd.DataFrame(expected)
tm.assert_frame_equal(result, expected)


def test_memory_usage(dtype):
Expand Down
39 changes: 23 additions & 16 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,24 @@ def test_notna_notnull(notna_f):
assert not notna_f(None)
assert not notna_f(np.NaN)

with cf.option_context("mode.use_inf_as_na", False):
assert notna_f(np.inf)
assert notna_f(-np.inf)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with cf.option_context("mode.use_inf_as_na", False):
assert notna_f(np.inf)
assert notna_f(-np.inf)

arr = np.array([1.5, np.inf, 3.5, -np.inf])
result = notna_f(arr)
assert result.all()
arr = np.array([1.5, np.inf, 3.5, -np.inf])
result = notna_f(arr)
assert result.all()

with cf.option_context("mode.use_inf_as_na", True):
assert not notna_f(np.inf)
assert not notna_f(-np.inf)
with tm.assert_produces_warning(FutureWarning, match=msg):
with cf.option_context("mode.use_inf_as_na", True):
assert not notna_f(np.inf)
assert not notna_f(-np.inf)

arr = np.array([1.5, np.inf, 3.5, -np.inf])
result = notna_f(arr)
assert result.sum() == 2
arr = np.array([1.5, np.inf, 3.5, -np.inf])
result = notna_f(arr)
assert result.sum() == 2


@pytest.mark.parametrize("null_func", [notna, notnull, isna, isnull])
Expand All @@ -82,8 +85,10 @@ def test_notna_notnull(notna_f):
],
)
def test_null_check_is_series(null_func, ser):
with cf.option_context("mode.use_inf_as_na", False):
assert isinstance(null_func(ser), Series)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with cf.option_context("mode.use_inf_as_na", False):
assert isinstance(null_func(ser), Series)


class TestIsNA:
Expand Down Expand Up @@ -214,8 +219,10 @@ def test_isna_old_datetimelike(self):
objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")]

for obj in objs:
with cf.option_context("mode.use_inf_as_na", True):
result = isna(obj)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with cf.option_context("mode.use_inf_as_na", True):
result = isna(obj)

tm.assert_numpy_array_equal(result, expected)

Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ def test_fillna_fill_other(self, data):
def test_use_inf_as_na_no_effect(self, data_missing):
ser = pd.Series(data_missing)
expected = ser.isna()
with pd.option_context("mode.use_inf_as_na", True):
result = ser.isna()
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
result = ser.isna()
self.assert_series_equal(result, expected)
10 changes: 6 additions & 4 deletions pandas/tests/frame/methods/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,12 @@ def test_dtypes_gh8722(self, float_string_frame):
tm.assert_series_equal(result, expected)

# compat, GH 8722
with option_context("use_inf_as_na", True):
df = DataFrame([[1]])
result = df.dtypes
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with option_context("use_inf_as_na", True):
df = DataFrame([[1]])
result = df.dtypes
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))

def test_dtypes_timedeltas(self):
df = DataFrame(
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,8 +769,10 @@ def test_sort_index_use_inf_as_na(self):
{"col1": [1, 2, 3], "col2": [3, 4, 5]},
index=pd.date_range("2020", periods=3),
)
with pd.option_context("mode.use_inf_as_na", True):
result = expected.sort_index()
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
result = expected.sort_index()
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
Expand Down
34 changes: 20 additions & 14 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,30 +400,36 @@ def test_to_records_with_na_record(self):

def test_to_records_with_inf_as_na_record(self):
# GH 48526
with option_context("use_inf_as_na", True):
df = DataFrame(
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
)
df["record"] = df[[np.nan, np.inf]].to_records()
expected = """ NaN inf record
expected = """ NaN inf record
0 NaN b [0, inf, b]
1 NaN NaN [1, nan, nan]
2 e f [2, e, f]"""
result = repr(df)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with option_context("use_inf_as_na", True):
df = DataFrame(
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]],
columns=[np.nan, np.inf],
)
df["record"] = df[[np.nan, np.inf]].to_records()
result = repr(df)
assert result == expected

def test_to_records_with_inf_record(self):
# GH 48526
with option_context("use_inf_as_na", False):
df = DataFrame(
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
)
df["record"] = df[[np.nan, np.inf]].to_records()
expected = """ NaN inf record
expected = """ NaN inf record
0 inf b [0, inf, b]
1 NaN NaN [1, nan, nan]
2 e f [2, e, f]"""
result = repr(df)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with option_context("use_inf_as_na", False):
df = DataFrame(
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]],
columns=[np.nan, np.inf],
)
df["record"] = df[[np.nan, np.inf]].to_records()
result = repr(df)
assert result == expected

def test_masked_ea_with_formatter(self):
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/io/parser/common/test_inf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def test_read_csv_with_use_inf_as_na(all_parsers):
# https://github.com/pandas-dev/pandas/issues/35493
parser = all_parsers
data = "1.0\nNaN\n3.0"
with option_context("use_inf_as_na", True):
result = parser.read_csv(StringIO(data), header=None)
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with option_context("use_inf_as_na", True):
result = parser.read_csv(StringIO(data), header=None)
expected = DataFrame([1.0, np.nan, 3.0])
tm.assert_frame_equal(result, expected)
22 changes: 13 additions & 9 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,10 @@ def test_sum_inf(self):
arr = np.random.randn(100, 100).astype("f4")
arr[:, 2] = np.inf

with pd.option_context("mode.use_inf_as_na", True):
tm.assert_almost_equal(s.sum(), s2.sum())
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
tm.assert_almost_equal(s.sum(), s2.sum())

res = nanops.nansum(arr, axis=1)
assert np.isinf(res).all()
Expand Down Expand Up @@ -1102,13 +1104,15 @@ def test_idxminmax_with_inf(self):
assert s.idxmax() == 2
assert np.isnan(s.idxmax(skipna=False))

# Using old-style behavior that treats floating point nan, -inf, and
# +inf as missing
with pd.option_context("mode.use_inf_as_na", True):
assert s.idxmin() == 0
assert np.isnan(s.idxmin(skipna=False))
assert s.idxmax() == 0
np.isnan(s.idxmax(skipna=False))
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
# Using old-style behavior that treats floating point nan, -inf, and
# +inf as missing
with pd.option_context("mode.use_inf_as_na", True):
assert s.idxmin() == 0
assert np.isnan(s.idxmin(skipna=False))
assert s.idxmax() == 0
np.isnan(s.idxmax(skipna=False))

def test_sum_uint64(self):
# GH 53401
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/series/methods/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Categorical,
Series,
)
import pandas._testing as tm


class TestSeriesCount:
Expand All @@ -15,9 +16,13 @@ def test_count(self, datetime_series):

assert datetime_series.count() == np.isfinite(datetime_series).sum()

def test_count_inf_as_na(self):
# GH#29478
with pd.option_context("use_inf_as_na", True):
assert Series([pd.Timestamp("1990/1/1")]).count() == 1
ser = Series([pd.Timestamp("1990/1/1")])
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("use_inf_as_na", True):
assert ser.count() == 1

def test_count_categorical(self):
ser = Series(
Expand Down
Loading