diff --git a/doc/source/io.rst b/doc/source/io.rst index bca23dd18a0e3..82cb7abde4b38 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -227,7 +227,7 @@ na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA', - '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``. + '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN values are overridden, otherwise they're appended to. diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a6b6d704737bd..41231a8813fa5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,7 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) - +- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`) .. _whatsnew_0210.api_breaking: Backwards incompatible API changes diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2def4dc9dcf24..7a6f366d5b1a9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024 # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN', + b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN', b'nan', b''] diff --git a/pandas/io/common.py b/pandas/io/common.py index f4e12ea3fb173..1c987f6a9dfc3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -31,7 +31,7 @@ # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = set([ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' + 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) try: diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 362837a46f838..6f72ed51d76c6 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,7 +70,7 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN', + '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES)