From e15488e31ea00bda186c61d83437dd47d99d52c8 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 16:30:05 +0000 Subject: [PATCH 01/10] TST add test for dtype consistency with pd replace #23305 --- pandas/tests/test_replace.py | 69 ++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pandas/tests/test_replace.py diff --git a/pandas/tests/test_replace.py b/pandas/tests/test_replace.py new file mode 100644 index 0000000000000..935099a7a436c --- /dev/null +++ b/pandas/tests/test_replace.py @@ -0,0 +1,69 @@ +import pandas as pd +import pytest + +import pandas._testing as tm + + +@pytest.fixture +def input_dataframe(): + """Create the input dataframe""" + + # create input data + input_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "cat2", "cat3", "cat4"], + "col5": ["obj1", "obj2", "obj3", "obj4"], + } + + # explicitly cast columns as category and order them + input_df = pd.DataFrame(data=input_dict).astype( + {"col2": "category", "col4": "category"} + ) + input_df["col2"] = input_df["col2"].cat.reorder_categories( + ["a", "b", "c", "d"], ordered=True + ) + input_df["col4"] = input_df["col4"].cat.reorder_categories( + ["cat1", "cat2", "cat3", "cat4"], ordered=True + ) + + return input_df + + +@pytest.fixture +def expected_dataframe(): + """create the expected dataframe""" + + # create expected dataframe + expected_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "z"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "catX", "cat3", "cat4"], + "col5": ["obj9", "obj2", "obj3", "obj4"], + } + + # explicitly cast columns as category and order them + expected_df = pd.DataFrame(data=expected_dict).astype( + {"col2": "category", "col4": "category"} + ) + expected_df["col2"] = expected_df["col2"].cat.reorder_categories( + ["a", "b", "c", "z"], ordered=True + ) + expected_df["col4"] = expected_df["col4"].cat.reorder_categories( + ["cat1", "catX", "cat3", "cat4"], ordered=True + ) + + return expected_df + + +def test_replace_values_scalar(input_dataframe, expected_dataframe): + + # replace values in input dataframe + input_df = input_dataframe.replace("d", "z") + input_df = input_df.replace("obj1", "obj9") + input_df = input_df.replace("cat2", "catX") + + tm.assert_frame_equal(input_df, expected_dataframe) + From 9b699b57736a79d1646e39cc18bd3c0c4c257abd Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 16:34:45 +0000 Subject: [PATCH 02/10] TST remove blank to fix pep8 violation #23305 --- pandas/tests/test_replace.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/test_replace.py b/pandas/tests/test_replace.py index 935099a7a436c..da943881d509f 100644 --- a/pandas/tests/test_replace.py +++ b/pandas/tests/test_replace.py @@ -66,4 +66,3 @@ def test_replace_values_scalar(input_dataframe, expected_dataframe): input_df = input_df.replace("cat2", "catX") tm.assert_frame_equal(input_df, expected_dataframe) - From 65b040bab3d71d503dd9e20646dd964ea55cbe9a Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 17:24:04 +0000 Subject: [PATCH 03/10] TST: sort imports using isort #23305 --- pandas/tests/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_replace.py b/pandas/tests/test_replace.py index da943881d509f..6e7db0a5a2e43 100644 --- a/pandas/tests/test_replace.py +++ b/pandas/tests/test_replace.py @@ -1,6 +1,6 @@ -import pandas as pd import pytest +import pandas as pd import pandas._testing as tm From 22e07792298d6853dddfae0f6767e774deff785d Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 18:15:56 +0000 Subject: [PATCH 04/10] TST: move tests to appropriate location --- pandas/tests/frame/methods/test_replace.py | 85 ++++++++++++++++++++++ pandas/tests/test_replace.py | 68 ----------------- 2 files changed, 85 insertions(+), 68 deletions(-) delete mode 100644 pandas/tests/test_replace.py diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 498f7f7790514..ab5f5337b42aa 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -21,6 +21,64 @@ def mix_abc() -> Dict[str, List[Union[float, str]]]: return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} +@pytest.fixture +def input_category_df(): + """ + Create the input dataframe with explicit category column + """ + + # create input data + input_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "cat2", "cat3", "cat4"], + "col5": ["obj1", "obj2", "obj3", "obj4"], + } + + # explicitly cast columns as category and order them + input_df = pd.DataFrame(data=input_dict).astype( + {"col2": "category", "col4": "category"} + ) + input_df["col2"] = input_df["col2"].cat.reorder_categories( + ["a", "b", "c", "d"], ordered=True + ) + input_df["col4"] = input_df["col4"].cat.reorder_categories( + ["cat1", "cat2", "cat3", "cat4"], ordered=True + ) + + return input_df + + +@pytest.fixture +def expected_category_df(): + """ + Create the expected dataframe with explicit category column + """ + + # create expected dataframe + expected_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "z"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "catX", "cat3", "cat4"], + "col5": ["obj9", "obj2", "obj3", "obj4"], + } + + # explicitly cast columns as category and order them + expected_df = pd.DataFrame(data=expected_dict).astype( + {"col2": "category", "col4": "category"} + ) + expected_df["col2"] = expected_df["col2"].cat.reorder_categories( + ["a", "b", "c", "z"], ordered=True + ) + expected_df["col4"] = expected_df["col4"].cat.reorder_categories( + ["cat1", "catX", "cat3", "cat4"], ordered=True + ) + + return expected_df + + class TestDataFrameReplace: def test_replace_inplace(self, datetime_frame, float_string_frame): datetime_frame["A"][:5] = np.nan @@ -1420,3 +1478,30 @@ def test_replace_period_ignore_float(self): result = df.replace(1.0, 0.0) expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) tm.assert_frame_equal(expected, result) + + def test_replace_value_category_type(self, input_category_df, expected_category_df): + """ + Test to ensure category dtypes are maintained + after replace with direct values + """ + + # replace values in input dataframe + input_df = input_category_df.replace("d", "z") + input_df = input_df.replace("obj1", "obj9") + input_df = input_df.replace("cat2", "catX") + + tm.assert_frame_equal(input_df, expected_category_df) + + @pytest.mark.xfail( + reason="currently, category dtype gets changed to object type after replace, see #23305", strict=True + ) + def test_replace_dict_category_type(self, input_category_df, expected_category_df): + """ + Test to ensure category dtypes are maintained + after replace with dict values + """ + + # replace values in input dataframe + input_df = input_category_df.replace({"d": "z", "obj1": "obj9", "cat2": "catX"}) + + tm.assert_frame_equal(input_df, expected_category_df) \ No newline at end of file diff --git a/pandas/tests/test_replace.py b/pandas/tests/test_replace.py deleted file mode 100644 index 6e7db0a5a2e43..0000000000000 --- a/pandas/tests/test_replace.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest - -import pandas as pd -import pandas._testing as tm - - -@pytest.fixture -def input_dataframe(): - """Create the input dataframe""" - - # create input data - input_dict = { - "col1": [1, 2, 3, 4], - "col2": ["a", "b", "c", "d"], - "col3": [1.5, 2.5, 3.5, 4.5], - "col4": ["cat1", "cat2", "cat3", "cat4"], - "col5": ["obj1", "obj2", "obj3", "obj4"], - } - - # explicitly cast columns as category and order them - input_df = pd.DataFrame(data=input_dict).astype( - {"col2": "category", "col4": "category"} - ) - input_df["col2"] = input_df["col2"].cat.reorder_categories( - ["a", "b", "c", "d"], ordered=True - ) - input_df["col4"] = input_df["col4"].cat.reorder_categories( - ["cat1", "cat2", "cat3", "cat4"], ordered=True - ) - - return input_df - - -@pytest.fixture -def expected_dataframe(): - """create the expected dataframe""" - - # create expected dataframe - expected_dict = { - "col1": [1, 2, 3, 4], - "col2": ["a", "b", "c", "z"], - "col3": [1.5, 2.5, 3.5, 4.5], - "col4": ["cat1", "catX", "cat3", "cat4"], - "col5": ["obj9", "obj2", "obj3", "obj4"], - } - - # explicitly cast columns as category and order them - expected_df = pd.DataFrame(data=expected_dict).astype( - {"col2": "category", "col4": "category"} - ) - expected_df["col2"] = expected_df["col2"].cat.reorder_categories( - ["a", "b", "c", "z"], ordered=True - ) - expected_df["col4"] = expected_df["col4"].cat.reorder_categories( - ["cat1", "catX", "cat3", "cat4"], ordered=True - ) - - return expected_df - - -def test_replace_values_scalar(input_dataframe, expected_dataframe): - - # replace values in input dataframe - input_df = input_dataframe.replace("d", "z") - input_df = input_df.replace("obj1", "obj9") - input_df = input_df.replace("cat2", "catX") - - tm.assert_frame_equal(input_df, expected_dataframe) From 4ef1a534ec54e065249f1ff409c417c7415211d7 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 18:20:40 +0000 Subject: [PATCH 05/10] TST: fix formatting issues #23305 --- pandas/tests/frame/methods/test_replace.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index ab5f5337b42aa..b77573a25d79b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1481,7 +1481,7 @@ def test_replace_period_ignore_float(self): def test_replace_value_category_type(self, input_category_df, expected_category_df): """ - Test to ensure category dtypes are maintained + Test to ensure category dtypes are maintained after replace with direct values """ @@ -1493,15 +1493,16 @@ def test_replace_value_category_type(self, input_category_df, expected_category_ tm.assert_frame_equal(input_df, expected_category_df) @pytest.mark.xfail( - reason="currently, category dtype gets changed to object type after replace, see #23305", strict=True + reason="currently, category dtype gets changed to object type after replace, see #23305", + strict=True, ) def test_replace_dict_category_type(self, input_category_df, expected_category_df): """ - Test to ensure category dtypes are maintained + Test to ensure category dtypes are maintained after replace with dict values """ # replace values in input dataframe input_df = input_category_df.replace({"d": "z", "obj1": "obj9", "cat2": "catX"}) - tm.assert_frame_equal(input_df, expected_category_df) \ No newline at end of file + tm.assert_frame_equal(input_df, expected_category_df) From 3deef23febc8650cfa2a21af424499becf324ad0 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Jul 2020 18:22:48 +0000 Subject: [PATCH 06/10] TST: shorten long line #23305 --- pandas/tests/frame/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index b77573a25d79b..d7d4b991b44d6 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1493,7 +1493,7 @@ def test_replace_value_category_type(self, input_category_df, expected_category_ tm.assert_frame_equal(input_df, expected_category_df) @pytest.mark.xfail( - reason="currently, category dtype gets changed to object type after replace, see #23305", + reason="category dtype gets changed to object type after replace, see #23305", strict=True, ) def test_replace_dict_category_type(self, input_category_df, expected_category_df): From 0fce9e676f162c2b2707545365893c9c08ab0d26 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 14 Jul 2020 02:29:08 +0000 Subject: [PATCH 07/10] incorporate review comments --- pandas/tests/frame/methods/test_replace.py | 124 ++++++++++----------- 1 file changed, 59 insertions(+), 65 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d7d4b991b44d6..f01a1ab00465e 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -21,64 +21,6 @@ def mix_abc() -> Dict[str, List[Union[float, str]]]: return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} -@pytest.fixture -def input_category_df(): - """ - Create the input dataframe with explicit category column - """ - - # create input data - input_dict = { - "col1": [1, 2, 3, 4], - "col2": ["a", "b", "c", "d"], - "col3": [1.5, 2.5, 3.5, 4.5], - "col4": ["cat1", "cat2", "cat3", "cat4"], - "col5": ["obj1", "obj2", "obj3", "obj4"], - } - - # explicitly cast columns as category and order them - input_df = pd.DataFrame(data=input_dict).astype( - {"col2": "category", "col4": "category"} - ) - input_df["col2"] = input_df["col2"].cat.reorder_categories( - ["a", "b", "c", "d"], ordered=True - ) - input_df["col4"] = input_df["col4"].cat.reorder_categories( - ["cat1", "cat2", "cat3", "cat4"], ordered=True - ) - - return input_df - - -@pytest.fixture -def expected_category_df(): - """ - Create the expected dataframe with explicit category column - """ - - # create expected dataframe - expected_dict = { - "col1": [1, 2, 3, 4], - "col2": ["a", "b", "c", "z"], - "col3": [1.5, 2.5, 3.5, 4.5], - "col4": ["cat1", "catX", "cat3", "cat4"], - "col5": ["obj9", "obj2", "obj3", "obj4"], - } - - # explicitly cast columns as category and order them - expected_df = pd.DataFrame(data=expected_dict).astype( - {"col2": "category", "col4": "category"} - ) - expected_df["col2"] = expected_df["col2"].cat.reorder_categories( - ["a", "b", "c", "z"], ordered=True - ) - expected_df["col4"] = expected_df["col4"].cat.reorder_categories( - ["cat1", "catX", "cat3", "cat4"], ordered=True - ) - - return expected_df - - class TestDataFrameReplace: def test_replace_inplace(self, datetime_frame, float_string_frame): datetime_frame["A"][:5] = np.nan @@ -1479,21 +1421,59 @@ def test_replace_period_ignore_float(self): expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) tm.assert_frame_equal(expected, result) - def test_replace_value_category_type(self, input_category_df, expected_category_df): + def test_replace_value_category_type(self): """ Test to ensure category dtypes are maintained after replace with direct values """ + # create input data + input_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "cat2", "cat3", "cat4"], + "col5": ["obj1", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + input_df = pd.DataFrame(data=input_dict).astype( + {"col2": "category", "col4": "category"} + ) + input_df["col2"] = input_df["col2"].cat.reorder_categories( + ["a", "b", "c", "d"], ordered=True + ) + input_df["col4"] = input_df["col4"].cat.reorder_categories( + ["cat1", "cat2", "cat3", "cat4"], ordered=True + ) + + # create expected dataframe + expected_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "z"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "catX", "cat3", "cat4"], + "col5": ["obj9", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + expected_df = pd.DataFrame(data=expected_dict).astype( + {"col2": "category", "col4": "category"} + ) + expected_df["col2"] = expected_df["col2"].cat.reorder_categories( + ["a", "b", "c", "z"], ordered=True + ) + expected_df["col4"] = expected_df["col4"].cat.reorder_categories( + ["cat1", "catX", "cat3", "cat4"], ordered=True + ) + # replace values in input dataframe - input_df = input_category_df.replace("d", "z") + input_df = input_df.replace("d", "z") input_df = input_df.replace("obj1", "obj9") input_df = input_df.replace("cat2", "catX") - tm.assert_frame_equal(input_df, expected_category_df) + tm.assert_frame_equal(input_df, expected_df) @pytest.mark.xfail( - reason="category dtype gets changed to object type after replace, see #23305", + reason="category dtype gets changed to object type after replace, see #35268", strict=True, ) def test_replace_dict_category_type(self, input_category_df, expected_category_df): @@ -1502,7 +1482,21 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d after replace with dict values """ - # replace values in input dataframe - input_df = input_category_df.replace({"d": "z", "obj1": "obj9", "cat2": "catX"}) + # create input dataframe + input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]} + # explicitly cast columns as category + input_df = pd.DataFrame(data=input_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # create expected dataframe + expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} + # explicitly cast columns as category + expected_df = pd.DataFrame(data=expected_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # replace values in input dataframe using a dict + input_df = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) - tm.assert_frame_equal(input_df, expected_category_df) + tm.assert_frame_equal(input_df, expected_df) \ No newline at end of file From d440c5204c7b6a9fd3dc93dec621c6a2acb8da4d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 14 Jul 2020 02:32:11 +0000 Subject: [PATCH 08/10] add missing new line at the eof --- pandas/tests/frame/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f01a1ab00465e..533b442f489e7 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1499,4 +1499,4 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d # replace values in input dataframe using a dict input_df = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) - tm.assert_frame_equal(input_df, expected_df) \ No newline at end of file + tm.assert_frame_equal(input_df, expected_df) From 55698945c5e37b0ccc9af719b2638f29c25045d5 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 14 Jul 2020 03:48:10 +0000 Subject: [PATCH 09/10] empty commit to trigger travis rebuild From e22a63206e4f379f35e0923f6a736ad9ade1809d Mon Sep 17 00:00:00 2001 From: root Date: Thu, 16 Jul 2020 03:05:00 +0000 Subject: [PATCH 10/10] rename result/expected vars and add issue num as comment --- pandas/tests/frame/methods/test_replace.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 533b442f489e7..a295b1f8baf63 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1423,7 +1423,7 @@ def test_replace_period_ignore_float(self): def test_replace_value_category_type(self): """ - Test to ensure category dtypes are maintained + Test for #23305: to ensure category dtypes are maintained after replace with direct values """ @@ -1455,22 +1455,22 @@ def test_replace_value_category_type(self): "col5": ["obj9", "obj2", "obj3", "obj4"], } # explicitly cast columns as category and order them - expected_df = pd.DataFrame(data=expected_dict).astype( + expected = pd.DataFrame(data=expected_dict).astype( {"col2": "category", "col4": "category"} ) - expected_df["col2"] = expected_df["col2"].cat.reorder_categories( + expected["col2"] = expected["col2"].cat.reorder_categories( ["a", "b", "c", "z"], ordered=True ) - expected_df["col4"] = expected_df["col4"].cat.reorder_categories( + expected["col4"] = expected["col4"].cat.reorder_categories( ["cat1", "catX", "cat3", "cat4"], ordered=True ) # replace values in input dataframe input_df = input_df.replace("d", "z") input_df = input_df.replace("obj1", "obj9") - input_df = input_df.replace("cat2", "catX") + result = input_df.replace("cat2", "catX") - tm.assert_frame_equal(input_df, expected_df) + tm.assert_frame_equal(result, expected) @pytest.mark.xfail( reason="category dtype gets changed to object type after replace, see #35268", @@ -1492,11 +1492,11 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d # create expected dataframe expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} # explicitly cast columns as category - expected_df = pd.DataFrame(data=expected_dict).astype( + expected = pd.DataFrame(data=expected_dict).astype( {"col1": "category", "col2": "category", "col3": "category"} ) # replace values in input dataframe using a dict - input_df = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) + result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) - tm.assert_frame_equal(input_df, expected_df) + tm.assert_frame_equal(result, expected)