Skip to content

Commit 3caf858

Browse files
OlegShteynbukjreback
authored andcommitted
consolidated the duplicate definitions of NA values (in parsers & IO) (#16589)
1 parent d02ef6f commit 3caf858

File tree

3 files changed

+18
-16
lines changed

3 files changed

+18
-16
lines changed

doc/source/io.rst

+8-7
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,9 @@ NA and Missing Data Handling
225225

226226
na_values : scalar, str, list-like, or dict, default ``None``
227227
Additional strings to recognize as NA/NaN. If dict passed, specific per-column
228-
NA values. By default the following values are interpreted as NaN:
229-
``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA',
230-
'#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``.
228+
NA values. See :ref:`na values const <io.navaluesconst>` below
229+
for a list of the values interpreted as NaN by default.
230+
231231
keep_default_na : boolean, default ``True``
232232
If na_values are specified and keep_default_na is ``False`` the default NaN
233233
values are overridden, otherwise they're appended to.
@@ -1030,10 +1030,11 @@ the corresponding equivalent values will also imply a missing value (in this cas
10301030
``[5.0,5]`` are recognized as ``NaN``.
10311031

10321032
To completely override the default values that are recognized as missing, specify ``keep_default_na=False``.
1033-
The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A','N/A', 'NA',
1034-
'#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan']``. Although a 0-length string
1035-
``''`` is not included in the default ``NaN`` values list, it is still treated
1036-
as a missing value.
1033+
1034+
.. _io.navaluesconst:
1035+
1036+
The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A',
1037+
'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
10371038

10381039
.. code-block:: python
10391040

pandas/_libs/parsers.pyx

+8-7
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ from pandas.core.algorithms import take_1d
5050
from pandas.core.dtypes.concat import union_categoricals
5151
from pandas import Index
5252

53+
import pandas.io.common as com
54+
5355
import time
5456
import os
5557

@@ -273,13 +275,6 @@ cdef extern from "parser/io.h":
273275

274276
DEFAULT_CHUNKSIZE = 256 * 1024
275277

276-
# common NA values
277-
# no longer excluding inf representations
278-
# '1.#INF','-1.#INF', '1.#INF000000',
279-
_NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN',
280-
b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN',
281-
b'nan', b'']
282-
283278

284279
cdef class TextReader:
285280
"""
@@ -1380,6 +1375,12 @@ cdef asbytes(object o):
13801375
return str(o)
13811376

13821377

1378+
# common NA values
1379+
# no longer excluding inf representations
1380+
# '1.#INF','-1.#INF', '1.#INF000000',
1381+
_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
1382+
1383+
13831384
def _is_file_like(obj):
13841385
if PY3:
13851386
import io

pandas/tests/io/parser/na_values.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
from numpy import nan
1010

11-
import pandas.io.parsers as parsers
11+
import pandas.io.common as com
1212
import pandas.util.testing as tm
1313

1414
from pandas import DataFrame, Index, MultiIndex
@@ -72,7 +72,7 @@ def test_default_na_values(self):
7272
_NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
7373
'#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null',
7474
'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', ''])
75-
assert _NA_VALUES == parsers._NA_VALUES
75+
assert _NA_VALUES == com._NA_VALUES
7676
nv = len(_NA_VALUES)
7777

7878
def f(i, v):

0 commit comments

Comments
 (0)