From 52890005c6e72a9e7f4731f271158485205a1bbb Mon Sep 17 00:00:00 2001 From: Abdurrahmaan Iqbal Date: Wed, 7 Oct 2020 09:10:39 +0100 Subject: [PATCH 01/10] BUG: GH36928 Allow dict_keys to be used as column names by read_csv --- pandas/_libs/lib.pyx | 2 ++ pandas/tests/io/parser/test_common.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 922dcd7e74aa0..b0eda8180a46e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1019,6 +1019,8 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: and not (util.is_array(obj) and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) + # allow dict_keys objects + or isinstance(obj, abc.KeysView) ) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 78c2f2bce5a02..e561bdceb5424 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2221,3 +2221,12 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers): ) with pytest.raises(ValueError, match=msg): parser.read_table(f, delim_whitespace=True, sep=",") + + +def test_dict_keys_as_names(all_parsers): + data = "a,b\n1,2" + + keys = {"a": int, "b": int}.keys() + parser = all_parsers + + parser.read_csv(StringIO(data), names=keys) From 45e10ffb88e9bc4dbc509c7a8f7bce60f0432e76 Mon Sep 17 00:00:00 2001 From: Abdurrahmaan Iqbal Date: Wed, 7 Oct 2020 17:50:50 +0100 Subject: [PATCH 02/10] BUG: GH36928 Add whatsnew entry and move fix to _validate_names function instead of is_list_like --- doc/source/whatsnew/v1.1.4.rst | 2 +- pandas/_libs/lib.pyx | 2 -- pandas/io/parsers.py | 4 +++- pandas/tests/io/parser/test_common.py | 7 +++++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index e63912ebc8fee..4c96abfe45f6b 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- `dict_keys` not being accepted as valid column names by `pandas.io.parsers._validate_names` .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index b0eda8180a46e..922dcd7e74aa0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1019,8 +1019,6 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: and not (util.is_array(obj) and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) - # allow dict_keys objects - or isinstance(obj, abc.KeysView) ) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index dd3588faedf7a..d671c432c2005 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -420,7 +420,9 @@ def _validate_names(names): if names is not None: if len(names) != len(set(names)): raise ValueError("Duplicate names are not allowed.") - if not is_list_like(names, allow_sets=False): + if not is_list_like(names, allow_sets=False) and not isinstance( + names, abc.KeysView + ): raise ValueError("Names should be an ordered collection.") diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index e561bdceb5424..435ff634a5fad 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2224,9 +2224,12 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers): def test_dict_keys_as_names(all_parsers): - data = "a,b\n1,2" + # GH: 36928 + data = "1,2" keys = {"a": int, "b": int}.keys() parser = all_parsers - parser.read_csv(StringIO(data), names=keys) + result = parser.read_csv(StringIO(data), names=keys) + expected = DataFrame({"a": [1], "b": [2]}) + tm.assert_frame_equal(result, expected) From b2004b83cae49ce47b156ad252dadca351bcaed6 Mon Sep 17 00:00:00 2001 From: Abdurrahmaan Iqbal Date: Wed, 7 Oct 2020 18:11:44 +0100 Subject: [PATCH 03/10] BUG: GH36928 Use two backticks instead of one in RST for correct code formatting --- doc/source/whatsnew/v1.1.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 4c96abfe45f6b..8abff3dc44989 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- `dict_keys` not being accepted as valid column names by `pandas.io.parsers._validate_names` +- ``dict_keys`` not being accepted as valid column names by ``pandas.io.parsers._validate_names`` .. --------------------------------------------------------------------------- From 1871a417d8c49fa09263f5684a89afd97233b319 Mon Sep 17 00:00:00 2001 From: Abdurrahmaan Iqbal Date: Wed, 7 Oct 2020 18:26:58 +0100 Subject: [PATCH 04/10] BUG: GH36928 Improve whatsnew description --- doc/source/whatsnew/v1.1.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 8abff3dc44989..dc9478d2353e3 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- ``dict_keys`` not being accepted as valid column names by ``pandas.io.parsers._validate_names`` +- Fixed regression in :func:`read_csv` raised ``ValueError`` when ``names`` was ``dict_keys`` (:issue:`36928`) .. --------------------------------------------------------------------------- From 89c9d51e0385f712eef33dde03f357e954eb2e7c Mon Sep 17 00:00:00 2001 From: abmyii <52673001+abmyii@users.noreply.github.com> Date: Wed, 7 Oct 2020 19:27:17 +0100 Subject: [PATCH 05/10] Update doc/source/whatsnew/v1.1.4.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index dc9478d2353e3..d38545d5639f5 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Fixed regression in :func:`read_csv` raised ``ValueError`` when ``names`` was ``dict_keys`` (:issue:`36928`) +- Fixed regression in :func:`read_csv` raising a ``ValueError`` when ``names`` was of type ``dict_keys`` (:issue:`36928`) .. --------------------------------------------------------------------------- From d1650490c0a20f9e1ccf0abc849e5a64cf094a7b Mon Sep 17 00:00:00 2001 From: abmyii <52673001+abmyii@users.noreply.github.com> Date: Thu, 8 Oct 2020 08:16:00 +0100 Subject: [PATCH 06/10] Update test_common.py --- pandas/tests/io/parser/test_common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 64be052cb14e4..86e7caa602a50 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2221,7 +2221,10 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter): "delim_whitespace=True; you can only specify one." ) with pytest.raises(ValueError, match=msg): - parser.read_table(f, delim_whitespace=True, sep=",") + parser.read_csv(f, delim_whitespace=True, sep=delimiter) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) @pytest.mark.parametrize("delimiter", [",", "\t"]) From 1c2296cd447f04d7aa2f26a99a3a0f2577b0f6b9 Mon Sep 17 00:00:00 2001 From: abmyii <52673001+abmyii@users.noreply.github.com> Date: Thu, 8 Oct 2020 17:12:26 +0100 Subject: [PATCH 07/10] Update test_common.py --- pandas/tests/io/parser/test_common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 86e7caa602a50..2281811299ab9 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2223,9 +2223,6 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter): with pytest.raises(ValueError, match=msg): parser.read_csv(f, delim_whitespace=True, sep=delimiter) - with pytest.raises(ValueError, match=msg): - parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) - @pytest.mark.parametrize("delimiter", [",", "\t"]) def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): From b3f18b7a81eec277f4dae7413f14c44a213dc79e Mon Sep 17 00:00:00 2001 From: abmyii <52673001+abmyii@users.noreply.github.com> Date: Thu, 8 Oct 2020 17:19:17 +0100 Subject: [PATCH 08/10] Update test_common.py --- pandas/tests/io/parser/test_common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 2281811299ab9..f0cef57e4ba1a 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2222,6 +2222,9 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter): ) with pytest.raises(ValueError, match=msg): parser.read_csv(f, delim_whitespace=True, sep=delimiter) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) @pytest.mark.parametrize("delimiter", [",", "\t"]) @@ -2250,7 +2253,3 @@ def test_dict_keys_as_names(all_parsers): result = parser.read_csv(StringIO(data), names=keys) expected = DataFrame({"a": [1], "b": [2]}) tm.assert_frame_equal(result, expected) - parser.read_csv(f, delim_whitespace=True, sep=delimiter) - - with pytest.raises(ValueError, match=msg): - parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) From e74274143ca55dc2ac2b84d71452225feb8217ac Mon Sep 17 00:00:00 2001 From: abmyii <52673001+abmyii@users.noreply.github.com> Date: Thu, 8 Oct 2020 17:19:39 +0100 Subject: [PATCH 09/10] Update test_common.py --- pandas/tests/io/parser/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index f0cef57e4ba1a..edf1d780ef107 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2222,7 +2222,7 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter): ) with pytest.raises(ValueError, match=msg): parser.read_csv(f, delim_whitespace=True, sep=delimiter) - + with pytest.raises(ValueError, match=msg): parser.read_csv(f, delim_whitespace=True, delimiter=delimiter) From 423545cab36eb087f27b92d56f8de65566c616e2 Mon Sep 17 00:00:00 2001 From: Abdurrahmaan Iqbal Date: Thu, 8 Oct 2020 22:03:28 +0100 Subject: [PATCH 10/10] Combine conditions --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3e8e8fa279235..63c3f9899d915 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -420,8 +420,8 @@ def _validate_names(names): if names is not None: if len(names) != len(set(names)): raise ValueError("Duplicate names are not allowed.") - if not is_list_like(names, allow_sets=False) and not isinstance( - names, abc.KeysView + if not ( + is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView) ): raise ValueError("Names should be an ordered collection.")