From f7fdf458357a9ef95f0fb25cf008d17ecccd7cb8 Mon Sep 17 00:00:00 2001 From: DANIEL SAXTON Date: Mon, 20 Jan 2020 12:06:16 -0600 Subject: [PATCH 01/23] Add test --- pandas/tests/arrays/test_boolean.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index cc8d0cdcb518d..58328d1154115 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -251,6 +251,13 @@ def test_coerce_to_numpy_array(): np.array(arr, dtype="bool") +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings(["True", "False"]) + expected = BooleanArray(np.array([True, False]), np.array([False, False])) + + tm.assert_extension_array_equal(result, expected) + + def test_repr(): df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) expected = " A\n0 True\n1 False\n2 " From 9e5735073e65cda9078ed8fd7c03b62c54aafc9d Mon Sep 17 00:00:00 2001 From: DANIEL SAXTON Date: Mon, 20 Jan 2020 12:06:30 -0600 Subject: [PATCH 02/23] Implement _from_sequence_of_strings --- pandas/core/arrays/boolean.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index eaa17df1235d3..5ffc485844e1d 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -286,6 +286,19 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): values, mask = coerce_to_array(scalars, copy=copy) return BooleanArray(values, mask) + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def map_string(s): + if s in ["True", "true", "1"]: + return True + elif s in ["False", "false", "0"]: + return False + else: + return s + + scalars = [map_string(x) for x in strings] + return cls._from_sequence(scalars, dtype, copy) + def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: data = self._data.astype("int8") data[self._mask] = -1 From eb591cd14f743d579a725dc22e605274f46b152e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 15:08:31 -0600 Subject: [PATCH 03/23] Add read_csv test --- pandas/tests/arrays/test_boolean.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 58328d1154115..c6de99da28b0f 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -1,3 +1,4 @@ +import io import operator import numpy as np @@ -258,6 +259,15 @@ def test_to_boolean_array_from_strings(): tm.assert_extension_array_equal(result, expected) +def test_boolean_from_csv(): + input_string = "a\nTrue\nFalse\nNA\n" + + result = pd.read_csv(io.StringIO(input_string), dtype="boolean") + expected = pd.DataFrame({"a": pd.array([True, False, None], dtype="boolean")}) + + tm.assert_frame_equal(result, expected) + + def test_repr(): df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) expected = " A\n0 True\n1 False\n2 " From 87ac09b7f7552084ee5f99387b4ccff048657915 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 15:11:18 -0600 Subject: [PATCH 04/23] Add to release note --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3bd86bb02155f..c720070b96969 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -144,7 +144,7 @@ type dedicated to boolean data that can hold missing values. The default ``bool`` data type based on a bool-dtype NumPy array, the column can only hold ``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray` can store missing values as well by keeping track of this in a separate mask. -(:issue:`29555`, :issue:`30095`) +(:issue:`29555`, :issue:`30095`, :issue:`31131`) .. ipython:: python From 51b6ac9ce2f140ef2d8e47720f48163d243bc48c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:12:16 -0600 Subject: [PATCH 05/23] Check for pd.NA --- pandas/core/arrays/boolean.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 5ffc485844e1d..a0443fa22ecd1 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -289,7 +289,9 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): def map_string(s): - if s in ["True", "true", "1"]: + if isna(s): + return s + elif s in ["True", "true", "1"]: return True elif s in ["False", "false", "0"]: return False From 6ffb018fdd5210f9593e6d985ccc26c2b2cf85be Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:13:56 -0600 Subject: [PATCH 06/23] Update tests --- pandas/tests/arrays/test_boolean.py | 18 ++++++------------ pandas/tests/io/parser/test_dtypes.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index c6de99da28b0f..d521afd4516e0 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -252,22 +252,16 @@ def test_coerce_to_numpy_array(): np.array(arr, dtype="bool") -def test_to_boolean_array_from_strings(): - result = BooleanArray._from_sequence_of_strings(["True", "False"]) - expected = BooleanArray(np.array([True, False]), np.array([False, False])) +@pytest.mark.parametrize( "na_value", [None, np.nan, pd.NA]) +def test_to_boolean_array_from_strings(na_value): + result = BooleanArray._from_sequence_of_strings(["True", "False", na_value]) + expected = BooleanArray( + np.array([True, False, False]), np.array([False, False, True]) + ) tm.assert_extension_array_equal(result, expected) -def test_boolean_from_csv(): - input_string = "a\nTrue\nFalse\nNA\n" - - result = pd.read_csv(io.StringIO(input_string), dtype="boolean") - expected = pd.DataFrame({"a": pd.array([True, False, None], dtype="boolean")}) - - tm.assert_frame_equal(result, expected) - - def test_repr(): df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) expected = " A\n0 True\n1 False\n2 " diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index d08c86bf2ae75..03570c2e29f6b 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -550,3 +550,19 @@ def test_numeric_dtype(all_parsers, dtype): result = parser.read_csv(StringIO(data), header=None, dtype=dtype) tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("null_string", ["NaN", "nan", "NA", "null", "NULL", ""]) +def test_boolean_dtype(all_parsers, null_string): + parser = all_parsers + data = f"a,b\nTrue,False\nTrue,{null_string}\n" + + result = parser.read_csv(StringIO(data), dtype="boolean") + expected = pd.DataFrame( + { + "a": pd.array([True, True], dtype="boolean"), + "b": pd.array([False, None], dtype="boolean"), + } + ) + + tm.assert_frame_equal(result, expected) From 9ddc5b1d01c4a5b895b7760b55a6a0295eaf342f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:19:34 -0600 Subject: [PATCH 07/23] Blacken --- pandas/tests/arrays/test_boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index d521afd4516e0..f714db2397ea2 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -252,7 +252,7 @@ def test_coerce_to_numpy_array(): np.array(arr, dtype="bool") -@pytest.mark.parametrize( "na_value", [None, np.nan, pd.NA]) +@pytest.mark.parametrize("na_value", [None, np.nan, pd.NA]) def test_to_boolean_array_from_strings(na_value): result = BooleanArray._from_sequence_of_strings(["True", "False", na_value]) expected = BooleanArray( From 4dd64b8bbf0a69bdcb618090c8d3f75f3c5d0a8e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:22:47 -0600 Subject: [PATCH 08/23] Type arguments --- pandas/core/arrays/boolean.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index a0443fa22ecd1..1f47ca30d3398 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,5 +1,5 @@ import numbers -from typing import TYPE_CHECKING, Any, Tuple, Type +from typing import TYPE_CHECKING, Any, List, Tuple, Type import warnings import numpy as np @@ -287,7 +287,9 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): return BooleanArray(values, mask) @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def _from_sequence_of_strings( + cls, strings: List[str], dtype: str = None, copy: bool = False + ): def map_string(s): if isna(s): return s From d28a6deae4dc48f99fa6074d741273077c4d611f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:28:04 -0600 Subject: [PATCH 09/23] Use optional type --- pandas/core/arrays/boolean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 1f47ca30d3398..730a45af2bb4f 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,5 +1,5 @@ import numbers -from typing import TYPE_CHECKING, Any, List, Tuple, Type +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type import warnings import numpy as np @@ -288,7 +288,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): @classmethod def _from_sequence_of_strings( - cls, strings: List[str], dtype: str = None, copy: bool = False + cls, strings: List[str], dtype: Optional[str] = None, copy: bool = False ): def map_string(s): if isna(s): From 978f22d8cda87ad6e6c0e4db7e5ed5c5dc95d7ea Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:50:49 -0600 Subject: [PATCH 10/23] Don't import io --- pandas/tests/arrays/test_boolean.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index f714db2397ea2..8921332228a94 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -1,4 +1,3 @@ -import io import operator import numpy as np From 19f9c1855f482cc6990417ba4d2ce3c937bf1ec7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 17:52:56 -0600 Subject: [PATCH 11/23] Don't type dtype --- pandas/core/arrays/boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 730a45af2bb4f..10440098ed044 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -288,7 +288,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): @classmethod def _from_sequence_of_strings( - cls, strings: List[str], dtype: Optional[str] = None, copy: bool = False + cls, strings: List[str], dtype=None, copy: bool = False ): def map_string(s): if isna(s): From f851b83f241db0df1a17f11b479e4212b9526893 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 18:16:53 -0600 Subject: [PATCH 12/23] Nit --- pandas/tests/io/parser/test_dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 03570c2e29f6b..20936c358dedd 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -552,10 +552,10 @@ def test_numeric_dtype(all_parsers, dtype): tm.assert_frame_equal(expected, result) -@pytest.mark.parametrize("null_string", ["NaN", "nan", "NA", "null", "NULL", ""]) -def test_boolean_dtype(all_parsers, null_string): +@pytest.mark.parametrize("na_string", ["NaN", "nan", "NA", "null", "NULL", ""]) +def test_boolean_dtype(all_parsers, na_string): parser = all_parsers - data = f"a,b\nTrue,False\nTrue,{null_string}\n" + data = f"a,b\nTrue,False\nTrue,{na_string}\n" result = parser.read_csv(StringIO(data), dtype="boolean") expected = pd.DataFrame( From 29dbfa1165311260ffd54afbc6f8208ee27405c6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Jan 2020 18:18:06 -0600 Subject: [PATCH 13/23] Don't import Optional --- pandas/core/arrays/boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 10440098ed044..231b57883595d 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,5 +1,5 @@ import numbers -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, List, Tuple, Type import warnings import numpy as np From 0481153fb10aacb4bda5afd8610c12380a057524 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 21 Jan 2020 14:57:52 -0600 Subject: [PATCH 14/23] Change Boolean strings --- pandas/core/arrays/boolean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 231b57883595d..6b1621cf946ee 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -293,9 +293,9 @@ def _from_sequence_of_strings( def map_string(s): if isna(s): return s - elif s in ["True", "true", "1"]: + elif s in ["True", "TRUE", "true"]: return True - elif s in ["False", "false", "0"]: + elif s in ["False", "FALSE", "false"]: return False else: return s From be0731b4c7cfc91d24318de7dcdf44d12a066a2f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 21 Jan 2020 15:57:15 -0600 Subject: [PATCH 15/23] Parametrize test over true / false strings --- pandas/tests/io/parser/test_dtypes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 20936c358dedd..a22e5d20d4629 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -553,9 +553,11 @@ def test_numeric_dtype(all_parsers, dtype): @pytest.mark.parametrize("na_string", ["NaN", "nan", "NA", "null", "NULL", ""]) -def test_boolean_dtype(all_parsers, na_string): +@pytest.mark.parametrize("true_string", ["True", "TRUE", "true"]) +@pytest.mark.parametrize("false_string", ["False", "FALSE", "false"]) +def test_boolean_dtype(all_parsers, na_string, true_string, false_string): parser = all_parsers - data = f"a,b\nTrue,False\nTrue,{na_string}\n" + data = f"a,b\n{true_string},{false_string}\nTrue,{na_string}\n" result = parser.read_csv(StringIO(data), dtype="boolean") expected = pd.DataFrame( From fdec55d4528fdba4b43bcd47d996bec4dd961c90 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 21 Jan 2020 16:46:43 -0600 Subject: [PATCH 16/23] Remove test parameterization --- pandas/tests/io/parser/test_dtypes.py | 34 ++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index a22e5d20d4629..7dd67e89ffadb 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -552,18 +552,36 @@ def test_numeric_dtype(all_parsers, dtype): tm.assert_frame_equal(expected, result) -@pytest.mark.parametrize("na_string", ["NaN", "nan", "NA", "null", "NULL", ""]) -@pytest.mark.parametrize("true_string", ["True", "TRUE", "true"]) -@pytest.mark.parametrize("false_string", ["False", "FALSE", "false"]) -def test_boolean_dtype(all_parsers, na_string, true_string, false_string): - parser = all_parsers - data = f"a,b\n{true_string},{false_string}\nTrue,{na_string}\n" +def test_boolean_dtype(all_parsers): + parser = all_parsers + data = "\n".join( + [ + "a", + "True", + "TRUE", + "true", + "False", + "FALSE", + "false", + "NaN", + "nan", + "NA", + "null", + "NULL", + ] + ) + + assert all([s in data for s in ["True", "TRUE", "true"]]) + assert all([s in data for s in ["False", "FALSE", "false"]]) + assert all([s in data for s in ["NaN", "nan", "NA", "null", "NULL"]]) result = parser.read_csv(StringIO(data), dtype="boolean") expected = pd.DataFrame( { - "a": pd.array([True, True], dtype="boolean"), - "b": pd.array([False, None], dtype="boolean"), + "a": pd.array( + [True, True, True, False, False, False, None, None, None, None, None], + dtype="boolean", + ) } ) From e2656cb4ae02256c223f2e71b1a0800d4b28cd93 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 22 Jan 2020 11:11:11 -0600 Subject: [PATCH 17/23] Fix linting --- pandas/tests/io/parser/test_dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 7dd67e89ffadb..4baa573b76df1 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -571,9 +571,9 @@ def test_boolean_dtype(all_parsers): ] ) - assert all([s in data for s in ["True", "TRUE", "true"]]) - assert all([s in data for s in ["False", "FALSE", "false"]]) - assert all([s in data for s in ["NaN", "nan", "NA", "null", "NULL"]]) + assert all(s in data for s in ["True", "TRUE", "true"]) + assert all(s in data for s in ["False", "FALSE", "false"]) + assert all(s in data for s in ["NaN", "nan", "NA", "null", "NULL"]) result = parser.read_csv(StringIO(data), dtype="boolean") expected = pd.DataFrame( From 6d06b841ba8df4df58303595d7a6a6dd974d8229 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Jan 2020 08:16:59 -0600 Subject: [PATCH 18/23] Take out assertions --- pandas/tests/io/parser/test_dtypes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 4baa573b76df1..11dcf7f04f76b 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -571,10 +571,6 @@ def test_boolean_dtype(all_parsers): ] ) - assert all(s in data for s in ["True", "TRUE", "true"]) - assert all(s in data for s in ["False", "FALSE", "false"]) - assert all(s in data for s in ["NaN", "nan", "NA", "null", "NULL"]) - result = parser.read_csv(StringIO(data), dtype="boolean") expected = pd.DataFrame( { From 604b862fc6851044ac232c9296ebd602daa9e0af Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Jan 2020 08:33:56 -0600 Subject: [PATCH 19/23] Take out parameterization --- pandas/tests/arrays/test_boolean.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 8921332228a94..81dda8c672c02 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -251,11 +251,10 @@ def test_coerce_to_numpy_array(): np.array(arr, dtype="bool") -@pytest.mark.parametrize("na_value", [None, np.nan, pd.NA]) def test_to_boolean_array_from_strings(na_value): - result = BooleanArray._from_sequence_of_strings(["True", "False", na_value]) + result = BooleanArray._from_sequence_of_strings(["True", "False"]) expected = BooleanArray( - np.array([True, False, False]), np.array([False, False, True]) + np.array([True, False]), np.array([False, False]) ) tm.assert_extension_array_equal(result, expected) From f2db6edb5b8522d38fb43935fd4c02b1d1ce1ffe Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Jan 2020 09:36:58 -0600 Subject: [PATCH 20/23] Blacken --- pandas/tests/arrays/test_boolean.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 81dda8c672c02..c32757f3aac46 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -253,9 +253,7 @@ def test_coerce_to_numpy_array(): def test_to_boolean_array_from_strings(na_value): result = BooleanArray._from_sequence_of_strings(["True", "False"]) - expected = BooleanArray( - np.array([True, False]), np.array([False, False]) - ) + expected = BooleanArray(np.array([True, False]), np.array([False, False])) tm.assert_extension_array_equal(result, expected) From 9e35b63e2bdc9809de9c7703a0a9f0cf80a32569 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Thu, 23 Jan 2020 09:41:23 -0600 Subject: [PATCH 21/23] Update pandas/tests/arrays/test_boolean.py Co-Authored-By: Joris Van den Bossche --- pandas/tests/arrays/test_boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index c32757f3aac46..438c6d1044f84 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -252,7 +252,7 @@ def test_coerce_to_numpy_array(): def test_to_boolean_array_from_strings(na_value): - result = BooleanArray._from_sequence_of_strings(["True", "False"]) + result = BooleanArray._from_sequence_of_strings(np.array(["True", "False", np.nan], dtype=object)) expected = BooleanArray(np.array([True, False]), np.array([False, False])) tm.assert_extension_array_equal(result, expected) From 71bafbf5e428486bc4d815458c5aa0f122ae7bb1 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Jan 2020 09:58:01 -0600 Subject: [PATCH 22/23] Update tests and raise invalid string error --- pandas/core/arrays/boolean.py | 2 +- pandas/tests/arrays/test_boolean.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 6b1621cf946ee..7b12f3348e7e7 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -298,7 +298,7 @@ def map_string(s): elif s in ["False", "FALSE", "false"]: return False else: - return s + raise ValueError(f"{s} cannot be cast to bool") scalars = [map_string(x) for x in strings] return cls._from_sequence(scalars, dtype, copy) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 438c6d1044f84..d08a534031aed 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -251,13 +251,20 @@ def test_coerce_to_numpy_array(): np.array(arr, dtype="bool") -def test_to_boolean_array_from_strings(na_value): - result = BooleanArray._from_sequence_of_strings(np.array(["True", "False", np.nan], dtype=object)) +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings( + np.array(["True", "False", np.nan], dtype=object) + ) expected = BooleanArray(np.array([True, False]), np.array([False, False])) tm.assert_extension_array_equal(result, expected) +def test_to_boolean_array_from_strings_invalid_string(): + with pytest.raises(ValueError, match="cannot be cast"): + BooleanArray._from_sequence_of_strings(["donkey"]) + + def test_repr(): df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) expected = " A\n0 True\n1 False\n2 " From 184a9be5c2483103e4b760df2bbe621c79af7caa Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Jan 2020 11:13:19 -0600 Subject: [PATCH 23/23] Fix test --- pandas/tests/arrays/test_boolean.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index d08a534031aed..465d873f0a2ad 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -255,7 +255,9 @@ def test_to_boolean_array_from_strings(): result = BooleanArray._from_sequence_of_strings( np.array(["True", "False", np.nan], dtype=object) ) - expected = BooleanArray(np.array([True, False]), np.array([False, False])) + expected = BooleanArray( + np.array([True, False, False]), np.array([False, False, True]) + ) tm.assert_extension_array_equal(result, expected)