Skip to content

Commit 2b69700

Browse files
authored
DEPR: use_inf_as_na (#53494)
* DEPR: use_inf_as_na * troubleshoot doc * troubleshoot docbuild * troubleshoot doc * troubleshoot docs
1 parent 21a81e9 commit 2b69700

File tree

17 files changed

+135
-96
lines changed

17 files changed

+135
-96
lines changed

doc/source/user_guide/missing_data.rst

-5
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,6 @@ detect this value with data of different types: floating point, integer,
2929
boolean, and general object. In many cases, however, the Python ``None`` will
3030
arise and we wish to also consider that "missing" or "not available" or "NA".
3131

32-
.. note::
33-
34-
If you want to consider ``inf`` and ``-inf`` to be "NA" in computations,
35-
you can set ``pandas.options.mode.use_inf_as_na = True``.
36-
3732
.. _missing.isna:
3833

3934
.. ipython:: python

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ Deprecations
275275
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
276276
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
277277
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
278+
- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
278279
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
279280

280281
.. ---------------------------------------------------------------------------

pandas/_config/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def __init__(self, *args) -> None:
438438
self.ops = list(zip(args[::2], args[1::2]))
439439

440440
def __enter__(self) -> None:
441-
self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops]
441+
self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]
442442

443443
for pat, val in self.ops:
444444
_set_option(pat, val, silent=True)

pandas/core/config_init.py

+8
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,8 @@ def is_terminal() -> bool:
411411
True means treat None, NaN, INF, -INF as NA (old way),
412412
False means None and NaN are null, but INF, -INF are not NA
413413
(new way).
414+
415+
This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
414416
"""
415417

416418
# We don't want to start importing everything at the global context level
@@ -426,6 +428,12 @@ def use_inf_as_na_cb(key) -> None:
426428
with cf.config_prefix("mode"):
427429
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)
428430

431+
cf.deprecate_option(
432+
# GH#51684
433+
"mode.use_inf_as_na",
434+
"use_inf_as_na option is deprecated and will be removed in a future "
435+
"version. Convert inf values to NaN before operating instead.",
436+
)
429437

430438
data_manager_doc = """
431439
: string

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -10740,8 +10740,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
1074010740
"""
1074110741
Count non-NA cells for each column or row.
1074210742
10743-
The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending
10744-
on `pandas.options.mode.use_inf_as_na`) are considered NA.
10743+
The values `None`, `NaN`, `NaT`, ``pandas.NA`` are considered NA.
1074510744
1074610745
Parameters
1074710746
----------

pandas/core/indexes/base.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -2820,8 +2820,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
28202820
NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
28212821
mapped to ``True`` values.
28222822
Everything else get mapped to ``False`` values. Characters such as
2823-
empty strings `''` or :attr:`numpy.inf` are not considered NA values
2824-
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
2823+
empty strings `''` or :attr:`numpy.inf` are not considered NA values.
28252824
28262825
Returns
28272826
-------
@@ -2876,8 +2875,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
28762875
28772876
Return a boolean same-sized object indicating if the values are not NA.
28782877
Non-missing values get mapped to ``True``. Characters such as empty
2879-
strings ``''`` or :attr:`numpy.inf` are not considered NA values
2880-
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
2878+
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
28812879
NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
28822880
values.
28832881

pandas/tests/arrays/categorical/test_missing.py

+23-19
Original file line numberDiff line numberDiff line change
@@ -137,18 +137,20 @@ def test_fillna_array(self):
137137
)
138138
def test_use_inf_as_na(self, values, expected):
139139
# https://github.com/pandas-dev/pandas/issues/33594
140-
with pd.option_context("mode.use_inf_as_na", True):
141-
cat = Categorical(values)
142-
result = cat.isna()
143-
tm.assert_numpy_array_equal(result, expected)
140+
msg = "use_inf_as_na option is deprecated"
141+
with tm.assert_produces_warning(FutureWarning, match=msg):
142+
with pd.option_context("mode.use_inf_as_na", True):
143+
cat = Categorical(values)
144+
result = cat.isna()
145+
tm.assert_numpy_array_equal(result, expected)
144146

145-
result = Series(cat).isna()
146-
expected = Series(expected)
147-
tm.assert_series_equal(result, expected)
147+
result = Series(cat).isna()
148+
expected = Series(expected)
149+
tm.assert_series_equal(result, expected)
148150

149-
result = DataFrame(cat).isna()
150-
expected = DataFrame(expected)
151-
tm.assert_frame_equal(result, expected)
151+
result = DataFrame(cat).isna()
152+
expected = DataFrame(expected)
153+
tm.assert_frame_equal(result, expected)
152154

153155
@pytest.mark.parametrize(
154156
"values, expected",
@@ -164,17 +166,19 @@ def test_use_inf_as_na_outside_context(self, values, expected):
164166
# Using isna directly for Categorical will fail in general here
165167
cat = Categorical(values)
166168

167-
with pd.option_context("mode.use_inf_as_na", True):
168-
result = isna(cat)
169-
tm.assert_numpy_array_equal(result, expected)
169+
msg = "use_inf_as_na option is deprecated"
170+
with tm.assert_produces_warning(FutureWarning, match=msg):
171+
with pd.option_context("mode.use_inf_as_na", True):
172+
result = isna(cat)
173+
tm.assert_numpy_array_equal(result, expected)
170174

171-
result = isna(Series(cat))
172-
expected = Series(expected)
173-
tm.assert_series_equal(result, expected)
175+
result = isna(Series(cat))
176+
expected = Series(expected)
177+
tm.assert_series_equal(result, expected)
174178

175-
result = isna(DataFrame(cat))
176-
expected = DataFrame(expected)
177-
tm.assert_frame_equal(result, expected)
179+
result = isna(DataFrame(cat))
180+
expected = DataFrame(expected)
181+
tm.assert_frame_equal(result, expected)
178182

179183
@pytest.mark.parametrize(
180184
"a1, a2, categories",

pandas/tests/arrays/string_/test_string.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -492,17 +492,19 @@ def test_value_counts_with_normalize(dtype):
492492
def test_use_inf_as_na(values, expected, dtype):
493493
# https://github.com/pandas-dev/pandas/issues/33655
494494
values = pd.array(values, dtype=dtype)
495-
with pd.option_context("mode.use_inf_as_na", True):
496-
result = values.isna()
497-
tm.assert_numpy_array_equal(result, expected)
498-
499-
result = pd.Series(values).isna()
500-
expected = pd.Series(expected)
501-
tm.assert_series_equal(result, expected)
502-
503-
result = pd.DataFrame(values).isna()
504-
expected = pd.DataFrame(expected)
505-
tm.assert_frame_equal(result, expected)
495+
msg = "use_inf_as_na option is deprecated"
496+
with tm.assert_produces_warning(FutureWarning, match=msg):
497+
with pd.option_context("mode.use_inf_as_na", True):
498+
result = values.isna()
499+
tm.assert_numpy_array_equal(result, expected)
500+
501+
result = pd.Series(values).isna()
502+
expected = pd.Series(expected)
503+
tm.assert_series_equal(result, expected)
504+
505+
result = pd.DataFrame(values).isna()
506+
expected = pd.DataFrame(expected)
507+
tm.assert_frame_equal(result, expected)
506508

507509

508510
def test_memory_usage(dtype):

pandas/tests/dtypes/test_missing.py

+23-16
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,24 @@ def test_notna_notnull(notna_f):
5353
assert not notna_f(None)
5454
assert not notna_f(np.NaN)
5555

56-
with cf.option_context("mode.use_inf_as_na", False):
57-
assert notna_f(np.inf)
58-
assert notna_f(-np.inf)
56+
msg = "use_inf_as_na option is deprecated"
57+
with tm.assert_produces_warning(FutureWarning, match=msg):
58+
with cf.option_context("mode.use_inf_as_na", False):
59+
assert notna_f(np.inf)
60+
assert notna_f(-np.inf)
5961

60-
arr = np.array([1.5, np.inf, 3.5, -np.inf])
61-
result = notna_f(arr)
62-
assert result.all()
62+
arr = np.array([1.5, np.inf, 3.5, -np.inf])
63+
result = notna_f(arr)
64+
assert result.all()
6365

64-
with cf.option_context("mode.use_inf_as_na", True):
65-
assert not notna_f(np.inf)
66-
assert not notna_f(-np.inf)
66+
with tm.assert_produces_warning(FutureWarning, match=msg):
67+
with cf.option_context("mode.use_inf_as_na", True):
68+
assert not notna_f(np.inf)
69+
assert not notna_f(-np.inf)
6770

68-
arr = np.array([1.5, np.inf, 3.5, -np.inf])
69-
result = notna_f(arr)
70-
assert result.sum() == 2
71+
arr = np.array([1.5, np.inf, 3.5, -np.inf])
72+
result = notna_f(arr)
73+
assert result.sum() == 2
7174

7275

7376
@pytest.mark.parametrize("null_func", [notna, notnull, isna, isnull])
@@ -82,8 +85,10 @@ def test_notna_notnull(notna_f):
8285
],
8386
)
8487
def test_null_check_is_series(null_func, ser):
85-
with cf.option_context("mode.use_inf_as_na", False):
86-
assert isinstance(null_func(ser), Series)
88+
msg = "use_inf_as_na option is deprecated"
89+
with tm.assert_produces_warning(FutureWarning, match=msg):
90+
with cf.option_context("mode.use_inf_as_na", False):
91+
assert isinstance(null_func(ser), Series)
8792

8893

8994
class TestIsNA:
@@ -214,8 +219,10 @@ def test_isna_old_datetimelike(self):
214219
objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")]
215220

216221
for obj in objs:
217-
with cf.option_context("mode.use_inf_as_na", True):
218-
result = isna(obj)
222+
msg = "use_inf_as_na option is deprecated"
223+
with tm.assert_produces_warning(FutureWarning, match=msg):
224+
with cf.option_context("mode.use_inf_as_na", True):
225+
result = isna(obj)
219226

220227
tm.assert_numpy_array_equal(result, expected)
221228

pandas/tests/extension/base/missing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ def test_fillna_fill_other(self, data):
154154
def test_use_inf_as_na_no_effect(self, data_missing):
155155
ser = pd.Series(data_missing)
156156
expected = ser.isna()
157-
with pd.option_context("mode.use_inf_as_na", True):
158-
result = ser.isna()
157+
msg = "use_inf_as_na option is deprecated"
158+
with tm.assert_produces_warning(FutureWarning, match=msg):
159+
with pd.option_context("mode.use_inf_as_na", True):
160+
result = ser.isna()
159161
self.assert_series_equal(result, expected)

pandas/tests/frame/methods/test_dtypes.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,12 @@ def test_dtypes_gh8722(self, float_string_frame):
9696
tm.assert_series_equal(result, expected)
9797

9898
# compat, GH 8722
99-
with option_context("use_inf_as_na", True):
100-
df = DataFrame([[1]])
101-
result = df.dtypes
102-
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))
99+
msg = "use_inf_as_na option is deprecated"
100+
with tm.assert_produces_warning(FutureWarning, match=msg):
101+
with option_context("use_inf_as_na", True):
102+
df = DataFrame([[1]])
103+
result = df.dtypes
104+
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))
103105

104106
def test_dtypes_timedeltas(self):
105107
df = DataFrame(

pandas/tests/frame/methods/test_sort_index.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -769,8 +769,10 @@ def test_sort_index_use_inf_as_na(self):
769769
{"col1": [1, 2, 3], "col2": [3, 4, 5]},
770770
index=pd.date_range("2020", periods=3),
771771
)
772-
with pd.option_context("mode.use_inf_as_na", True):
773-
result = expected.sort_index()
772+
msg = "use_inf_as_na option is deprecated"
773+
with tm.assert_produces_warning(FutureWarning, match=msg):
774+
with pd.option_context("mode.use_inf_as_na", True):
775+
result = expected.sort_index()
774776
tm.assert_frame_equal(result, expected)
775777

776778
@pytest.mark.parametrize(

pandas/tests/frame/test_repr_info.py

+20-14
Original file line numberDiff line numberDiff line change
@@ -400,30 +400,36 @@ def test_to_records_with_na_record(self):
400400

401401
def test_to_records_with_inf_as_na_record(self):
402402
# GH 48526
403-
with option_context("use_inf_as_na", True):
404-
df = DataFrame(
405-
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
406-
)
407-
df["record"] = df[[np.nan, np.inf]].to_records()
408-
expected = """ NaN inf record
403+
expected = """ NaN inf record
409404
0 NaN b [0, inf, b]
410405
1 NaN NaN [1, nan, nan]
411406
2 e f [2, e, f]"""
412-
result = repr(df)
407+
msg = "use_inf_as_na option is deprecated"
408+
with tm.assert_produces_warning(FutureWarning, match=msg):
409+
with option_context("use_inf_as_na", True):
410+
df = DataFrame(
411+
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]],
412+
columns=[np.nan, np.inf],
413+
)
414+
df["record"] = df[[np.nan, np.inf]].to_records()
415+
result = repr(df)
413416
assert result == expected
414417

415418
def test_to_records_with_inf_record(self):
416419
# GH 48526
417-
with option_context("use_inf_as_na", False):
418-
df = DataFrame(
419-
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
420-
)
421-
df["record"] = df[[np.nan, np.inf]].to_records()
422-
expected = """ NaN inf record
420+
expected = """ NaN inf record
423421
0 inf b [0, inf, b]
424422
1 NaN NaN [1, nan, nan]
425423
2 e f [2, e, f]"""
426-
result = repr(df)
424+
msg = "use_inf_as_na option is deprecated"
425+
with tm.assert_produces_warning(FutureWarning, match=msg):
426+
with option_context("use_inf_as_na", False):
427+
df = DataFrame(
428+
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]],
429+
columns=[np.nan, np.inf],
430+
)
431+
df["record"] = df[[np.nan, np.inf]].to_records()
432+
result = repr(df)
427433
assert result == expected
428434

429435
def test_masked_ea_with_formatter(self):

pandas/tests/io/parser/common/test_inf.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ def test_read_csv_with_use_inf_as_na(all_parsers):
6262
# https://github.com/pandas-dev/pandas/issues/35493
6363
parser = all_parsers
6464
data = "1.0\nNaN\n3.0"
65-
with option_context("use_inf_as_na", True):
66-
result = parser.read_csv(StringIO(data), header=None)
65+
msg = "use_inf_as_na option is deprecated"
66+
with tm.assert_produces_warning(FutureWarning, match=msg):
67+
with option_context("use_inf_as_na", True):
68+
result = parser.read_csv(StringIO(data), header=None)
6769
expected = DataFrame([1.0, np.nan, 3.0])
6870
tm.assert_frame_equal(result, expected)

pandas/tests/reductions/test_reductions.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,10 @@ def test_sum_inf(self):
562562
arr = np.random.randn(100, 100).astype("f4")
563563
arr[:, 2] = np.inf
564564

565-
with pd.option_context("mode.use_inf_as_na", True):
566-
tm.assert_almost_equal(s.sum(), s2.sum())
565+
msg = "use_inf_as_na option is deprecated"
566+
with tm.assert_produces_warning(FutureWarning, match=msg):
567+
with pd.option_context("mode.use_inf_as_na", True):
568+
tm.assert_almost_equal(s.sum(), s2.sum())
567569

568570
res = nanops.nansum(arr, axis=1)
569571
assert np.isinf(res).all()
@@ -1102,13 +1104,15 @@ def test_idxminmax_with_inf(self):
11021104
assert s.idxmax() == 2
11031105
assert np.isnan(s.idxmax(skipna=False))
11041106

1105-
# Using old-style behavior that treats floating point nan, -inf, and
1106-
# +inf as missing
1107-
with pd.option_context("mode.use_inf_as_na", True):
1108-
assert s.idxmin() == 0
1109-
assert np.isnan(s.idxmin(skipna=False))
1110-
assert s.idxmax() == 0
1111-
np.isnan(s.idxmax(skipna=False))
1107+
msg = "use_inf_as_na option is deprecated"
1108+
with tm.assert_produces_warning(FutureWarning, match=msg):
1109+
# Using old-style behavior that treats floating point nan, -inf, and
1110+
# +inf as missing
1111+
with pd.option_context("mode.use_inf_as_na", True):
1112+
assert s.idxmin() == 0
1113+
assert np.isnan(s.idxmin(skipna=False))
1114+
assert s.idxmax() == 0
1115+
np.isnan(s.idxmax(skipna=False))
11121116

11131117
def test_sum_uint64(self):
11141118
# GH 53401

pandas/tests/series/methods/test_count.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Categorical,
66
Series,
77
)
8+
import pandas._testing as tm
89

910

1011
class TestSeriesCount:
@@ -15,9 +16,13 @@ def test_count(self, datetime_series):
1516

1617
assert datetime_series.count() == np.isfinite(datetime_series).sum()
1718

19+
def test_count_inf_as_na(self):
1820
# GH#29478
19-
with pd.option_context("use_inf_as_na", True):
20-
assert Series([pd.Timestamp("1990/1/1")]).count() == 1
21+
ser = Series([pd.Timestamp("1990/1/1")])
22+
msg = "use_inf_as_na option is deprecated"
23+
with tm.assert_produces_warning(FutureWarning, match=msg):
24+
with pd.option_context("use_inf_as_na", True):
25+
assert ser.count() == 1
2126

2227
def test_count_categorical(self):
2328
ser = Series(

0 commit comments

Comments
 (0)