From 8709bb28c96860ecd18adeba6b011f7e9181ef7a Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Tue, 25 Feb 2020 17:21:00 +0000 Subject: [PATCH 01/10] BUG: Fixed bug, where pandas._libs.lib.maybe_convert_objects function improperly handled arrays with bools and NaNs --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/lib.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 13827e8fc4c33..83a52f9c162df 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -98,6 +98,7 @@ Performance improvements Bug fixes ~~~~~~~~~ +- Fixed bug where :func:`pandas._libs.lib.maybe_convert_objects` improperly handled arrays with bools and NaNs. (:issue:`32146`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1990ef66a6bf1..e2035738756b8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2279,7 +2279,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return uints else: return ints - elif seen.is_bool: + elif seen.is_bool and not seen.nan_: return bools.view(np.bool_) return objects diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 48ae1f67297af..c9e8ea3883cf1 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -568,6 +568,12 @@ def test_maybe_convert_objects_nullable_integer(self, exp): tm.assert_extension_array_equal(result, exp) + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + arr = pd.Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr.values, safe=1), exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) From de4cffda59086beec3f20f5ea847affd5a1d0145 Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Wed, 26 Feb 2020 11:04:58 +0000 Subject: [PATCH 02/10] Minor corrections --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/dtypes/test_inference.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 83a52f9c162df..517bee83a3ab3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -98,7 +98,7 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug where :func:`pandas._libs.lib.maybe_convert_objects` improperly handled arrays with bools and NaNs. (:issue:`32146`) +- Fixed bug, where the Series representation of an object Index with bools and NaNs was wrong. (:issue:`32146`) Categorical ^^^^^^^^^^^ diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index c9e8ea3883cf1..107f32aee2286 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -572,7 +572,8 @@ def test_maybe_convert_objects_bool_nan(self): # GH32146 arr = pd.Index([True, False, np.nan], dtype=object) exp = np.array([True, False, np.nan], dtype=object) - tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr.values, safe=1), exp) + out = lib.maybe_convert_objects(arr.values, safe=1) + tm.assert_numpy_array_equal(out, exp) def test_mixed_dtypes_remain_object_array(self): # GH14956 From a47fb2d31c825a3f7173dfe7385b5726c8fad8f0 Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Thu, 27 Feb 2020 16:49:49 +0000 Subject: [PATCH 03/10] Added value_counts tests --- doc/source/whatsnew/v1.0.2.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/dtypes/test_inference.py | 30 +++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index c9031ac1ae9fe..94acffd8164b1 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) +- Fixed bug, where the ``Series`` representation of an object ``Index`` with bools and ``NaN``s was wrong (:issue:`32146`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 517bee83a3ab3..f6e379d8c3579 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -98,7 +98,7 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug, where the Series representation of an object Index with bools and NaNs was wrong. (:issue:`32146`) +- Categorical ^^^^^^^^^^^ diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 107f32aee2286..4eae1fbe4b63c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -575,6 +575,36 @@ def test_maybe_convert_objects_bool_nan(self): out = lib.maybe_convert_objects(arr.values, safe=1) tm.assert_numpy_array_equal(out, exp) + @pytest.mark.parametrize( + "arr, dropna, exp", + [ + ( + pd.Series([False, True, True, pd.NA]), + False, + pd.Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + pd.Series([False, True, True, pd.NA]), + True, + pd.Series([2, 1], index=[True, False]), + ), + ( + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), + ), + ], + ) + def test_maybe_convert_objects_value_counts(self, arr, dropna, exp): + out = arr.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp) + + def test_maybe_convert_objects_index_representation(self): + arr = pd.Index([True, False, np.nan], dtype=object) + exp = arr.format() + out = ["True", "False", "NaN"] + assert out == exp + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) From b69c69ea92af16c41e71c4fdcc17626164612b0b Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Thu, 27 Feb 2020 16:52:05 +0000 Subject: [PATCH 04/10] Fix typo --- pandas/tests/dtypes/test_inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 4eae1fbe4b63c..e98bf62e7b35c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -589,9 +589,9 @@ def test_maybe_convert_objects_bool_nan(self): pd.Series([2, 1], index=[True, False]), ), ( - pd.Series(range(3), index=[True, False, np.nan]).index, - False, - pd.Series([1, 1, 1], index=[True, False, pd.NA]), + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), ), ], ) From a2294ca82023d23afaad2ca6d3de6eb207c3724f Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Thu, 27 Feb 2020 17:27:48 +0000 Subject: [PATCH 05/10] Fix more typos --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index eb0c6461aaa57..57bd098d3bef2 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -24,7 +24,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) -- Fixed bug, where the ``Series`` representation of an object ``Index`` with bools and ``NaN``s was wrong (:issue:`32146`) +- Fixed bug, where the ``Series`` representation of an object ``Index`` with bools and ``NaN`` values was wrong (:issue:`32146`) - .. --------------------------------------------------------------------------- From 28ef7ddb1d1baa2486d5c0f8ccba25c27dccedfa Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Fri, 28 Feb 2020 11:14:02 +0000 Subject: [PATCH 06/10] Move tests to correct locations --- doc/source/whatsnew/v1.1.0.rst | 1 - pandas/tests/dtypes/test_inference.py | 34 ++----------------- pandas/tests/indexes/base_class/test_repr.py | 14 ++++++++ .../tests/series/methods/test_value_counts.py | 26 ++++++++++++++ 4 files changed, 42 insertions(+), 33 deletions(-) create mode 100644 pandas/tests/indexes/base_class/test_repr.py diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 965fd8a7c0023..2b64b85863def 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -123,7 +123,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Categorical ^^^^^^^^^^^ diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e98bf62e7b35c..abc6513d02e0a 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -570,41 +570,11 @@ def test_maybe_convert_objects_nullable_integer(self, exp): def test_maybe_convert_objects_bool_nan(self): # GH32146 - arr = pd.Index([True, False, np.nan], dtype=object) + ind = pd.Index([True, False, np.nan], dtype=object) exp = np.array([True, False, np.nan], dtype=object) - out = lib.maybe_convert_objects(arr.values, safe=1) + out = lib.maybe_convert_objects(ind.values, safe=1) tm.assert_numpy_array_equal(out, exp) - @pytest.mark.parametrize( - "arr, dropna, exp", - [ - ( - pd.Series([False, True, True, pd.NA]), - False, - pd.Series([2, 1, 1], index=[True, False, pd.NA]), - ), - ( - pd.Series([False, True, True, pd.NA]), - True, - pd.Series([2, 1], index=[True, False]), - ), - ( - pd.Series(range(3), index=[True, False, np.nan]).index, - False, - pd.Series([1, 1, 1], index=[True, False, pd.NA]), - ), - ], - ) - def test_maybe_convert_objects_value_counts(self, arr, dropna, exp): - out = arr.value_counts(dropna=dropna) - tm.assert_series_equal(out, exp) - - def test_maybe_convert_objects_index_representation(self): - arr = pd.Index([True, False, np.nan], dtype=object) - exp = arr.format() - out = ["True", "False", "NaN"] - assert out == exp - def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) diff --git a/pandas/tests/indexes/base_class/test_repr.py b/pandas/tests/indexes/base_class/test_repr.py new file mode 100644 index 0000000000000..bc11540ff467d --- /dev/null +++ b/pandas/tests/indexes/base_class/test_repr.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas import Index + + +class TestIndexRepr: + # Tests for the Index representation, specifically for the case that includes bools and NANs + + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp = arr.format() + out = ["True", "False", "NaN"] + assert out == exp diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index fdb35befeb0c2..f97362ce9c2a9 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import Categorical, CategoricalIndex, Series @@ -177,3 +178,28 @@ def test_value_counts_categorical_with_nan(self): exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) res = ser.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "ser, dropna, exp", + [ + ( + pd.Series([False, True, True, pd.NA]), + False, + pd.Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + pd.Series([False, True, True, pd.NA]), + True, + pd.Series([2, 1], index=[True, False]), + ), + ( + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), + ), + ], + ) + def test_value_counts_bool_with_nan(self, ser, dropna, exp): + # GH32146 + out = ser.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp) From 51f5b4f7ed8a1c8a5acd2af5a0b5966ce4402a44 Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Fri, 28 Feb 2020 11:38:10 +0000 Subject: [PATCH 07/10] Fix linting --- pandas/tests/indexes/base_class/test_repr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/base_class/test_repr.py b/pandas/tests/indexes/base_class/test_repr.py index bc11540ff467d..6c080da283f9e 100644 --- a/pandas/tests/indexes/base_class/test_repr.py +++ b/pandas/tests/indexes/base_class/test_repr.py @@ -4,7 +4,8 @@ class TestIndexRepr: - # Tests for the Index representation, specifically for the case that includes bools and NANs + # Tests for the Index representation, + # specifically for the case that includes bools and NANs def test_index_repr_bool_nan(self): # GH32146 From 893cbf762a05566c94d9982069bf9053f2f6361f Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Tue, 3 Mar 2020 10:02:00 +0000 Subject: [PATCH 08/10] Move test --- pandas/tests/indexes/base_class/test_repr.py | 15 --------------- pandas/tests/indexes/test_base.py | 9 +++++++++ 2 files changed, 9 insertions(+), 15 deletions(-) delete mode 100644 pandas/tests/indexes/base_class/test_repr.py diff --git a/pandas/tests/indexes/base_class/test_repr.py b/pandas/tests/indexes/base_class/test_repr.py deleted file mode 100644 index 6c080da283f9e..0000000000000 --- a/pandas/tests/indexes/base_class/test_repr.py +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np - -from pandas import Index - - -class TestIndexRepr: - # Tests for the Index representation, - # specifically for the case that includes bools and NANs - - def test_index_repr_bool_nan(self): - # GH32146 - arr = Index([True, False, np.nan], dtype=object) - exp = arr.format() - out = ["True", "False", "NaN"] - assert out == exp diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 22f6af2af4aed..917059932334c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2480,6 +2480,15 @@ def test_intersect_str_dates(self): expected = Index([], dtype=object) tm.assert_index_equal(result, expected) + def test_index_repr_bool_nan(self): + # Tests for the Index representation, + # specifically for the case that includes bools and NANs + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp = arr.format() + out = ["True", "False", "NaN"] + assert out == exp + class TestIndexUtils: @pytest.mark.parametrize( From 839168b74adda4a4c388105b40f8e8d3adf6d6ff Mon Sep 17 00:00:00 2001 From: Anna Daglis <40292327+AnnaDaglis@users.noreply.github.com> Date: Wed, 4 Mar 2020 12:38:33 +0000 Subject: [PATCH 09/10] Update doc/source/whatsnew/v1.0.2.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 9d90b3488f3b1..eec471f989037 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -24,7 +24,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) -- Fixed bug, where the ``Series`` representation of an object ``Index`` with bools and ``NaN`` values was wrong (:issue:`32146`) +- Fixed bug in the repr of an object-dtype ``Index`` with bools and missing values (:issue:`32146`) - .. --------------------------------------------------------------------------- From 8a1b8caed9732bb749725ac0e31d81287b040e85 Mon Sep 17 00:00:00 2001 From: Anna Daglis Date: Wed, 4 Mar 2020 12:57:25 +0000 Subject: [PATCH 10/10] Add extra test for object representation --- pandas/tests/indexes/test_base.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f921de6f50143..ee9cc44870c22 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2459,13 +2459,15 @@ def test_intersect_str_dates(self): tm.assert_index_equal(result, expected) def test_index_repr_bool_nan(self): - # Tests for the Index representation, - # specifically for the case that includes bools and NANs # GH32146 arr = Index([True, False, np.nan], dtype=object) - exp = arr.format() - out = ["True", "False", "NaN"] - assert out == exp + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 class TestIndexUtils: