Skip to content

Commit 9bfe7c8

Browse files
mroeschkepmhatre1
authored andcommitted
CLN: Remove inf_as_na (pandas-dev#57428)
1 parent 5914d8e commit 9bfe7c8

File tree

20 files changed

+44
-382
lines changed

20 files changed

+44
-382
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ Removal of prior version deprecations/changes
146146
- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
147147
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
148148
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
149+
- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
149150
- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`)
150151
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
151152
- Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`)

pandas/_libs/missing.pxd

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ from numpy cimport (
77
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
88
cpdef bint check_na_tuples_nonequal(object left, object right)
99

10-
cpdef bint checknull(object val, bint inf_as_na=*)
11-
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)
10+
cpdef bint checknull(object val)
11+
cpdef ndarray[uint8_t] isnaobj(ndarray arr)
1212

1313
cdef bint is_null_datetime64(v)
1414
cdef bint is_null_timedelta64(v)

pandas/_libs/missing.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ def is_matching_na(
1111
) -> bool: ...
1212
def isposinf_scalar(val: object) -> bool: ...
1313
def isneginf_scalar(val: object) -> bool: ...
14-
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
15-
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
14+
def checknull(val: object) -> bool: ...
15+
def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
1616
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

pandas/_libs/missing.pyx

+3-7
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
137137
return False
138138

139139

140-
cpdef bint checknull(object val, bint inf_as_na=False):
140+
cpdef bint checknull(object val):
141141
"""
142142
Return boolean describing of the input is NA-like, defined here as any
143143
of:
@@ -152,8 +152,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
152152
Parameters
153153
----------
154154
val : object
155-
inf_as_na : bool, default False
156-
Whether to treat INF and -INF as NA values.
157155
158156
Returns
159157
-------
@@ -164,8 +162,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
164162
elif util.is_float_object(val) or util.is_complex_object(val):
165163
if val != val:
166164
return True
167-
elif inf_as_na:
168-
return val == INF or val == NEGINF
169165
return False
170166
elif cnp.is_timedelta64_object(val):
171167
return cnp.get_timedelta64_value(val) == NPY_NAT
@@ -184,7 +180,7 @@ cdef bint is_decimal_na(object val):
184180

185181
@cython.wraparound(False)
186182
@cython.boundscheck(False)
187-
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
183+
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
188184
"""
189185
Return boolean mask denoting which elements of a 1-D array are na-like,
190186
according to the criteria defined in `checknull`:
@@ -217,7 +213,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
217213
# equivalents to `val = values[i]`
218214
val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
219215
cnp.PyArray_ITER_NEXT(it)
220-
is_null = checknull(val, inf_as_na=inf_as_na)
216+
is_null = checknull(val)
221217
# Dereference pointer (set value)
222218
(<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null
223219
cnp.PyArray_ITER_NEXT(it2)

pandas/core/config_init.py

-29
Original file line numberDiff line numberDiff line change
@@ -406,35 +406,6 @@ def is_terminal() -> bool:
406406
with cf.config_prefix("mode"):
407407
cf.register_option("sim_interactive", False, tc_sim_interactive_doc)
408408

409-
use_inf_as_na_doc = """
410-
: boolean
411-
True means treat None, NaN, INF, -INF as NA (old way),
412-
False means None and NaN are null, but INF, -INF are not NA
413-
(new way).
414-
415-
This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
416-
"""
417-
418-
# We don't want to start importing everything at the global context level
419-
# or we'll hit circular deps.
420-
421-
422-
def use_inf_as_na_cb(key) -> None:
423-
# TODO(3.0): enforcing this deprecation will close GH#52501
424-
from pandas.core.dtypes.missing import _use_inf_as_na
425-
426-
_use_inf_as_na(key)
427-
428-
429-
with cf.config_prefix("mode"):
430-
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)
431-
432-
cf.deprecate_option(
433-
# GH#51684
434-
"mode.use_inf_as_na",
435-
"use_inf_as_na option is deprecated and will be removed in a future "
436-
"version. Convert inf values to NaN before operating instead.",
437-
)
438409

439410
# TODO better name?
440411
copy_on_write_doc = """

pandas/core/dtypes/missing.py

+23-94
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from __future__ import annotations
55

66
from decimal import Decimal
7-
from functools import partial
87
from typing import (
98
TYPE_CHECKING,
109
overload,
@@ -13,8 +12,6 @@
1312

1413
import numpy as np
1514

16-
from pandas._config import get_option
17-
1815
from pandas._libs import lib
1916
import pandas._libs.missing as libmissing
2017
from pandas._libs.tslibs import (
@@ -64,8 +61,6 @@
6461
isposinf_scalar = libmissing.isposinf_scalar
6562
isneginf_scalar = libmissing.isneginf_scalar
6663

67-
nan_checker = np.isnan
68-
INF_AS_NA = False
6964
_dtype_object = np.dtype("object")
7065
_dtype_str = np.dtype(str)
7166

@@ -180,95 +175,57 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
180175
isnull = isna
181176

182177

183-
def _isna(obj, inf_as_na: bool = False):
178+
def _isna(obj):
184179
"""
185-
Detect missing values, treating None, NaN or NA as null. Infinite
186-
values will also be treated as null if inf_as_na is True.
180+
Detect missing values, treating None, NaN or NA as null.
187181
188182
Parameters
189183
----------
190184
obj: ndarray or object value
191185
Input array or scalar value.
192-
inf_as_na: bool
193-
Whether to treat infinity as null.
194186
195187
Returns
196188
-------
197189
boolean ndarray or boolean
198190
"""
199191
if is_scalar(obj):
200-
return libmissing.checknull(obj, inf_as_na=inf_as_na)
192+
return libmissing.checknull(obj)
201193
elif isinstance(obj, ABCMultiIndex):
202194
raise NotImplementedError("isna is not defined for MultiIndex")
203195
elif isinstance(obj, type):
204196
return False
205197
elif isinstance(obj, (np.ndarray, ABCExtensionArray)):
206-
return _isna_array(obj, inf_as_na=inf_as_na)
198+
return _isna_array(obj)
207199
elif isinstance(obj, ABCIndex):
208200
# Try to use cached isna, which also short-circuits for integer dtypes
209201
# and avoids materializing RangeIndex._values
210202
if not obj._can_hold_na:
211203
return obj.isna()
212-
return _isna_array(obj._values, inf_as_na=inf_as_na)
204+
return _isna_array(obj._values)
213205

214206
elif isinstance(obj, ABCSeries):
215-
result = _isna_array(obj._values, inf_as_na=inf_as_na)
207+
result = _isna_array(obj._values)
216208
# box
217209
result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
218210
return result
219211
elif isinstance(obj, ABCDataFrame):
220212
return obj.isna()
221213
elif isinstance(obj, list):
222-
return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na)
214+
return _isna_array(np.asarray(obj, dtype=object))
223215
elif hasattr(obj, "__array__"):
224-
return _isna_array(np.asarray(obj), inf_as_na=inf_as_na)
216+
return _isna_array(np.asarray(obj))
225217
else:
226218
return False
227219

228220

229-
def _use_inf_as_na(key) -> None:
230-
"""
231-
Option change callback for na/inf behaviour.
232-
233-
Choose which replacement for numpy.isnan / -numpy.isfinite is used.
234-
235-
Parameters
236-
----------
237-
flag: bool
238-
True means treat None, NaN, INF, -INF as null (old way),
239-
False means None and NaN are null, but INF, -INF are not null
240-
(new way).
241-
242-
Notes
243-
-----
244-
This approach to setting global module values is discussed and
245-
approved here:
246-
247-
* https://stackoverflow.com/questions/4859217/
248-
programmatically-creating-variables-in-python/4859312#4859312
249-
"""
250-
inf_as_na = get_option(key)
251-
globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na)
252-
if inf_as_na:
253-
globals()["nan_checker"] = lambda x: ~np.isfinite(x)
254-
globals()["INF_AS_NA"] = True
255-
else:
256-
globals()["nan_checker"] = np.isnan
257-
globals()["INF_AS_NA"] = False
258-
259-
260-
def _isna_array(
261-
values: ArrayLike, inf_as_na: bool = False
262-
) -> npt.NDArray[np.bool_] | NDFrame:
221+
def _isna_array(values: ArrayLike) -> npt.NDArray[np.bool_] | NDFrame:
263222
"""
264223
Return an array indicating which values of the input array are NaN / NA.
265224
266225
Parameters
267226
----------
268227
obj: ndarray or ExtensionArray
269228
The input array whose elements are to be checked.
270-
inf_as_na: bool
271-
Whether or not to treat infinite values as NA.
272229
273230
Returns
274231
-------
@@ -280,73 +237,47 @@ def _isna_array(
280237

281238
if not isinstance(values, np.ndarray):
282239
# i.e. ExtensionArray
283-
if inf_as_na and isinstance(dtype, CategoricalDtype):
284-
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
285-
else:
286-
# error: Incompatible types in assignment (expression has type
287-
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
288-
# type "ndarray[Any, dtype[bool_]]")
289-
result = values.isna() # type: ignore[assignment]
240+
# error: Incompatible types in assignment (expression has type
241+
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
242+
# type "ndarray[Any, dtype[bool_]]")
243+
result = values.isna() # type: ignore[assignment]
290244
elif isinstance(values, np.rec.recarray):
291245
# GH 48526
292-
result = _isna_recarray_dtype(values, inf_as_na=inf_as_na)
246+
result = _isna_recarray_dtype(values)
293247
elif is_string_or_object_np_dtype(values.dtype):
294-
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
248+
result = _isna_string_dtype(values)
295249
elif dtype.kind in "mM":
296250
# this is the NaT pattern
297251
result = values.view("i8") == iNaT
298252
else:
299-
if inf_as_na:
300-
result = ~np.isfinite(values)
301-
else:
302-
result = np.isnan(values)
253+
result = np.isnan(values)
303254

304255
return result
305256

306257

307-
def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
258+
def _isna_string_dtype(values: np.ndarray) -> npt.NDArray[np.bool_]:
308259
# Working around NumPy ticket 1542
309260
dtype = values.dtype
310261

311262
if dtype.kind in ("S", "U"):
312263
result = np.zeros(values.shape, dtype=bool)
313264
else:
314265
if values.ndim in {1, 2}:
315-
result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
266+
result = libmissing.isnaobj(values)
316267
else:
317268
# 0-D, reached via e.g. mask_missing
318-
result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
269+
result = libmissing.isnaobj(values.ravel())
319270
result = result.reshape(values.shape)
320271

321272
return result
322273

323274

324-
def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_:
325-
is_inf_in_record = np.zeros(len(record_as_array), dtype=bool)
326-
for i, value in enumerate(record_as_array):
327-
is_element_inf = False
328-
try:
329-
is_element_inf = np.isinf(value)
330-
except TypeError:
331-
is_element_inf = False
332-
is_inf_in_record[i] = is_element_inf
333-
334-
return np.any(is_inf_in_record)
335-
336-
337-
def _isna_recarray_dtype(
338-
values: np.rec.recarray, inf_as_na: bool
339-
) -> npt.NDArray[np.bool_]:
275+
def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]:
340276
result = np.zeros(values.shape, dtype=bool)
341277
for i, record in enumerate(values):
342278
record_as_array = np.array(record.tolist())
343279
does_record_contain_nan = isna_all(record_as_array)
344-
does_record_contain_inf = False
345-
if inf_as_na:
346-
does_record_contain_inf = bool(_has_record_inf_value(record_as_array))
347-
result[i] = np.any(
348-
np.logical_or(does_record_contain_nan, does_record_contain_inf)
349-
)
280+
result[i] = np.any(does_record_contain_nan)
350281

351282
return result
352283

@@ -788,7 +719,7 @@ def isna_all(arr: ArrayLike) -> bool:
788719

789720
dtype = arr.dtype
790721
if lib.is_np_dtype(dtype, "f"):
791-
checker = nan_checker
722+
checker = np.isnan
792723

793724
elif (lib.is_np_dtype(dtype, "mM")) or isinstance(
794725
dtype, (DatetimeTZDtype, PeriodDtype)
@@ -800,9 +731,7 @@ def isna_all(arr: ArrayLike) -> bool:
800731
else:
801732
# error: Incompatible types in assignment (expression has type "Callable[[Any],
802733
# Any]", variable has type "ufunc")
803-
checker = lambda x: _isna_array( # type: ignore[assignment]
804-
x, inf_as_na=INF_AS_NA
805-
)
734+
checker = _isna_array # type: ignore[assignment]
806735

807736
return all(
808737
checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)

pandas/core/generic.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -8102,8 +8102,7 @@ def isna(self) -> Self:
81028102
NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
81038103
values.
81048104
Everything else gets mapped to False values. Characters such as empty
8105-
strings ``''`` or :attr:`numpy.inf` are not considered NA values
8106-
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
8105+
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
81078106
81088107
Returns
81098108
-------
@@ -8174,8 +8173,7 @@ def notna(self) -> Self:
81748173
81758174
Return a boolean same-sized object indicating if the values are not NA.
81768175
Non-missing values get mapped to True. Characters such as empty
8177-
strings ``''`` or :attr:`numpy.inf` are not considered NA values
8178-
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
8176+
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
81798177
NA values, such as None or :attr:`numpy.NaN`, get mapped to False
81808178
values.
81818179

pandas/io/formats/format.py

-4
Original file line numberDiff line numberDiff line change
@@ -1205,10 +1205,6 @@ def _format(x):
12051205
return "None"
12061206
elif x is NA:
12071207
return str(NA)
1208-
elif lib.is_float(x) and np.isinf(x):
1209-
# TODO(3.0): this will be unreachable when use_inf_as_na
1210-
# deprecation is enforced
1211-
return str(x)
12121208
elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):
12131209
return "NaT"
12141210
return self.na_rep

0 commit comments

Comments
 (0)