From e605559d3e5ec74d198ceff36af39e83699b049a Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 7 Nov 2019 01:44:19 +0000 Subject: [PATCH 1/5] add test for df where with category dtype --- pandas/tests/frame/test_dtypes.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 68844aeeb081e..05f62a79a7b67 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,19 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) + def test_df_where_with_category(self): + # GH 16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + expected = df.A.where(mask[:, 0]) + result = df.A.where(mask[:, 0], other=None) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] ) From 062d281380c3dfa9e891f1981efeb89b867d6e96 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 7 Nov 2019 22:21:17 +0000 Subject: [PATCH 2/5] use parameterization --- pandas/tests/frame/test_dtypes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 05f62a79a7b67..05c07253b15c3 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,7 +815,8 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) - def test_df_where_with_category(self): + @pytest.mark.parametrize("kwargs", [dict(), dict(other=None)]) + def test_df_where_with_category(self, kwargs): # GH 16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, True], [False, True, True]]) @@ -824,8 +825,8 @@ def test_df_where_with_category(self): df.B = df.B.astype("category") df.C = df.C.astype("category") - expected = df.A.where(mask[:, 0]) - result = df.A.where(mask[:, 0], other=None) + result = df.A.where(mask[:, 0], **kwargs) + expected = Series(pd.Categorical([0, np.nan], categories=[0, 3]), name="A") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From c7913ab6e9b2ceb155c26dec9b39e0ebc7b08451 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 7 Nov 2019 23:47:44 +0000 Subject: [PATCH 3/5] add new test for astype category --- pandas/tests/frame/test_dtypes.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 05c07253b15c3..1e9d175c4b907 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,29 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) + def test_df_where_change_dtype(self): + # GH 16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + result = df.where(mask) + expected = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) + + tm.assert_frame_equal(result, expected) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.where(mask) + A = pd.Categorical([0, np.nan], categories=[0, 3]) + B = pd.Categorical([np.nan, 4], categories=[1, 4]) + C = pd.Categorical([2, 5], categories=[2, 5]) + expected = DataFrame({"A": A, "B": B, "C": C}) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("kwargs", [dict(), dict(other=None)]) def test_df_where_with_category(self, kwargs): # GH 16979 @@ -827,6 +850,7 @@ def test_df_where_with_category(self, kwargs): result = df.A.where(mask[:, 0], **kwargs) expected = Series(pd.Categorical([0, np.nan], categories=[0, 3]), name="A") + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From 35e91f9fab6f3a9b3d3523d5935b04de888f6daa Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 8 Nov 2019 00:58:34 +0000 Subject: [PATCH 4/5] format test --- pandas/tests/frame/test_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 1e9d175c4b907..3bae94990446e 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -843,6 +843,7 @@ def test_df_where_with_category(self, kwargs): # GH 16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, True], [False, True, True]]) + # change type to category df.A = df.A.astype("category") df.B = df.B.astype("category") From 602ac0667ae8521124fb278ff282876af72ad1d6 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 8 Nov 2019 17:13:41 +0000 Subject: [PATCH 5/5] remove test for astype category --- pandas/tests/frame/test_dtypes.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 3bae94990446e..c29f5e78b033f 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,29 +815,6 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) - def test_df_where_change_dtype(self): - # GH 16979 - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - mask = np.array([[True, False, True], [False, True, True]]) - - result = df.where(mask) - expected = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) - - tm.assert_frame_equal(result, expected) - - # change type to category - df.A = df.A.astype("category") - df.B = df.B.astype("category") - df.C = df.C.astype("category") - - result = df.where(mask) - A = pd.Categorical([0, np.nan], categories=[0, 3]) - B = pd.Categorical([np.nan, 4], categories=[1, 4]) - C = pd.Categorical([2, 5], categories=[2, 5]) - expected = DataFrame({"A": A, "B": B, "C": C}) - - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("kwargs", [dict(), dict(other=None)]) def test_df_where_with_category(self, kwargs): # GH 16979