From ed05de718b36f9039aeaa8c9f481b59b99dc0dba Mon Sep 17 00:00:00 2001 From: Chris Filo Gorgolewski Date: Thu, 20 Apr 2017 15:51:35 -0700 Subject: [PATCH 1/9] Added 'n/a' as a missing value (TDD) --- pandas/tests/io/parser/na_values.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 362837a46f838..a5d0219a18e9d 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,7 +70,7 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN', + '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES) From b82d85004fb55dbc25966dd2d56ce56922cc9a7d Mon Sep 17 00:00:00 2001 From: Chris Filo Gorgolewski Date: Thu, 20 Apr 2017 15:55:38 -0700 Subject: [PATCH 2/9] Adding 'n/a' --- pandas/io/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index f4e12ea3fb173..76586c3f2fc81 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -31,7 +31,7 @@ # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = set([ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' + 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' ]) try: From c6de730110a7ae132fb48239a4465f818df782a3 Mon Sep 17 00:00:00 2001 From: Chris Filo Gorgolewski Date: Thu, 20 Apr 2017 15:57:56 -0700 Subject: [PATCH 3/9] Adding 'n/a' --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2def4dc9dcf24..8820b1c280fb0 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024 # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN', + b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'NaN', b'nan', b''] From 26a5c1f1a45fc2d757811b0504ac8cb84c40feef Mon Sep 17 00:00:00 2001 From: Chris Filo Gorgolewski Date: Thu, 20 Apr 2017 15:59:14 -0700 Subject: [PATCH 4/9] adding 'n/a' --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 9692766505d7a..7bb05d18753fc 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -226,7 +226,7 @@ NA and Missing Data Handling na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: - ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA', + ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``. keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN From 54a806814af93bd3132c3fa1f4eeb29ba1ad934c Mon Sep 17 00:00:00 2001 From: Chris Gorgolewski Date: Thu, 20 Apr 2017 16:16:06 -0700 Subject: [PATCH 5/9] whats new --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a0bf2f9b3758a..94c8673c0f83b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -477,6 +477,7 @@ Other Enhancements - The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) +- ``pd.read_csv()`` now treats 'n/a' strings as missing values by default (:issue:`16078`) - ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) From f1d4c66f864056bfa10ab6d2437427bbbd5ce1ea Mon Sep 17 00:00:00 2001 From: Chris Gorgolewski Date: Fri, 21 Apr 2017 09:17:19 -0700 Subject: [PATCH 6/9] quotes --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 94c8673c0f83b..9800a20a80d05 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -477,7 +477,7 @@ Other Enhancements - The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) -- ``pd.read_csv()`` now treats 'n/a' strings as missing values by default (:issue:`16078`) +- ``pd.read_csv()`` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) From 8486f717a4fef82cd14868b9610392af0504cb6e Mon Sep 17 00:00:00 2001 From: Chris Gorgolewski Date: Thu, 18 May 2017 18:38:29 -0700 Subject: [PATCH 7/9] changelog --- doc/source/whatsnew/v0.20.0.txt | 1 - doc/source/whatsnew/v0.21.0.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9800a20a80d05..a0bf2f9b3758a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -477,7 +477,6 @@ Other Enhancements - The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) -- ``pd.read_csv()`` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0a3a440ced54f..23bd63acef2a1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -46,6 +46,7 @@ Backwards incompatible API changes - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +- ``pd.read_csv()`` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) .. _whatsnew_0210.api: From 091a6de47952682c2a2f237c5678db87b097bafb Mon Sep 17 00:00:00 2001 From: Chris Gorgolewski Date: Thu, 1 Jun 2017 17:01:56 -0700 Subject: [PATCH 8/9] making flake8 happy --- pandas/tests/io/parser/na_values.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 4abc796acb7eb..170f9d428c9cc 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -70,8 +70,8 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', - 'nan', '-NaN', '-nan', '#N/A N/A', '']) + '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', + 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) assert _NA_VALUES == parsers._NA_VALUES nv = len(_NA_VALUES) From eef689a2b1ad9fa5ddf3aa4ab52804b2c0e2dffb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 1 Jun 2017 20:26:37 -0400 Subject: [PATCH 9/9] codify whatsnew for added na values --- doc/source/whatsnew/v0.21.0.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ed679b0ca7bc7..3dd8bb2ac2de5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -38,7 +38,7 @@ Other Enhancements - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`) -- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`) + .. _whatsnew_0210.api_breaking: Backwards incompatible API changes @@ -49,7 +49,8 @@ Backwards incompatible API changes - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) -- ``pd.read_csv()`` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) +- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) +- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) .. _whatsnew_0210.api: