From e89f065c989e8179a989042db59e9980a089a1c0 Mon Sep 17 00:00:00 2001 From: Joachim Wagner Date: Tue, 1 May 2018 08:18:22 +0100 Subject: [PATCH 1/4] Mention NaN handling in dtype description To achieve preservation and avoid interpretation of string or object dtypes, NaN value interpretation must be switched off. --- pandas/io/parsers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 780aa5d02f598..7414a0cfb9142 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -125,7 +125,8 @@ are duplicate names in the columns. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} - Use `str` or `object` to preserve and not interpret dtype. + Use `str` or `object` together with passing `keep_default_na=False` and + `na_values` to preserve and not interpret dtype. If converters are specified, they will be applied INSTEAD of dtype conversion. %s From cdcb54bf80e4e6be6522c11c98a5ed3d71c45fef Mon Sep 17 00:00:00 2001 From: Joachim Wagner Date: Tue, 1 May 2018 12:13:44 +0100 Subject: [PATCH 2/4] reduce detail in reference to NaN settings Being less prescriptive how to use dtype=str as per suggestion from @jreback --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7414a0cfb9142..2c8f98732c92f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -125,8 +125,8 @@ are duplicate names in the columns. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} - Use `str` or `object` together with passing `keep_default_na=False` and - `na_values` to preserve and not interpret dtype. + Use `str` or `object` together with suitable `na_values` settings + to preserve and not interpret dtype. If converters are specified, they will be applied INSTEAD of dtype conversion. %s From e51aef71f181e10c5d8fc9a6385c5ed8984ab754 Mon Sep 17 00:00:00 2001 From: Joachim Wagner Date: Thu, 10 May 2018 15:16:14 +0100 Subject: [PATCH 3/4] mention NaN handling in dtype description Apply changes to the dtype description in io/parser.py (mentioning NaN handling) also to io.rst. --- doc/source/io.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index c5b7eff292722..94a9459c57c70 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -168,7 +168,8 @@ General Parsing Configuration dtype : Type name or dict of column -> type, default ``None`` Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}`` - (unsupported with ``engine='python'``). Use `str` or `object` to preserve and + (unsupported with ``engine='python'``). Use `str` or `object` together + with suitable `na_values` settings to preserve and not interpret dtype. .. versionadded:: 0.20.0 support for the Python parser. From 53dd911467b96c5e336b1fa0d0f94c55b406eb93 Mon Sep 17 00:00:00 2001 From: Joachim Wagner Date: Mon, 14 May 2018 10:58:45 +0100 Subject: [PATCH 4/4] Fix rst usage error in io.rst Not sure what role to use with a parameter reference. Using a literal for the moment. --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 94a9459c57c70..aa2484b0cb5c3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -169,7 +169,7 @@ General Parsing Configuration dtype : Type name or dict of column -> type, default ``None`` Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}`` (unsupported with ``engine='python'``). Use `str` or `object` together - with suitable `na_values` settings to preserve and + with suitable ``na_values`` settings to preserve and not interpret dtype. .. versionadded:: 0.20.0 support for the Python parser.