From d1063e6caae28a557760041dc715de302a6eaf4a Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Mon, 29 May 2017 15:43:42 -0400 Subject: [PATCH 1/5] make null lowercase a missing value --- pandas/io/common.py | 2 +- pandas/tests/io/parser/na_values.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index f4e12ea3fb173..1c987f6a9dfc3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -31,7 +31,7 @@ # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = set([ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' + 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) try: diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 362837a46f838..6f72ed51d76c6 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,7 +70,7 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN', + '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES) From dbcf7adddb6738225cde84397bbeb38c27c17870 Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Mon, 29 May 2017 17:01:10 -0400 Subject: [PATCH 2/5] add null lowercase to parsers.pyx --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2def4dc9dcf24..7a6f366d5b1a9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024 # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN', + b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN', b'nan', b''] From 7fb0cec66cca0a523aa91845a39e060af053690b Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Mon, 29 May 2017 19:18:34 -0400 Subject: [PATCH 3/5] added to whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a6b6d704737bd..5826cdc6a5bdf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,6 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) +- The ``na_values`` argument for :func:`read_csv` function now has 'null' (in lower case) as a default value that is interpreted as NaN: (:issue:`16471`) .. _whatsnew_0210.api_breaking: From 2a520a222b23bf1b1949af563238124a65ad1882 Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Tue, 30 May 2017 01:40:07 -0400 Subject: [PATCH 4/5] added 'null' to na_values in io.rst file --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index bca23dd18a0e3..82cb7abde4b38 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -227,7 +227,7 @@ na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA', - '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``. + '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN values are overridden, otherwise they're appended to. From da744381bdeebbdb36b4452e79c82326126b1289 Mon Sep 17 00:00:00 2001 From: Oleg Shteynbuk Date: Wed, 31 May 2017 18:53:39 -0400 Subject: [PATCH 5/5] whatsnew corrected --- doc/source/whatsnew/v0.21.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5826cdc6a5bdf..41231a8813fa5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,8 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) -- The ``na_values`` argument for :func:`read_csv` function now has 'null' (in lower case) as a default value that is interpreted as NaN: (:issue:`16471`) - +- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`) .. _whatsnew_0210.api_breaking: Backwards incompatible API changes