From b59d6b86f3b971d53bc3ec483ddb31b2b4dc559a Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Sat, 29 May 2021 10:10:04 -0700 Subject: [PATCH 1/6] TST: Check map function works with StringDType (#40823). --- pandas/tests/frame/methods/test_map.py | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 pandas/tests/frame/methods/test_map.py diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py new file mode 100644 index 0000000000000..28fca42a506e0 --- /dev/null +++ b/pandas/tests/frame/methods/test_map.py @@ -0,0 +1,33 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestMap: + def test_map(self): + # map test on StringDType, GH#40823 + df1 = pd.DataFrame( + { + "col1": [pd.NA, "foo", "bar"] + }, index=["id1", "id2", "id3"], dtype=pd.StringDtype() + ) + + df2 = pd.DataFrame( + { + "id": ["id4", "id2", "id1"] + }, dtype=pd.StringDtype() + ) + + df2["col1"] = df2["id"].map(df1["col1"]) + + result = df2 + expected = pd.DataFrame( + { + "id": ["id4", "id2", "id1"], + "col1": [pd.NA, "foo", pd.NA] + }, dtype=pd.StringDtype() + ) + + tm.assert_frame_equal(result, expected) \ No newline at end of file From 590396d6984f0b4d0794ddade636d60ab2521953 Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Sat, 29 May 2021 10:33:20 -0700 Subject: [PATCH 2/6] TST: Remove unused imports. --- pandas/tests/frame/methods/test_map.py | 29 +++++++++----------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 28fca42a506e0..b453410594466 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -1,6 +1,3 @@ -import numpy as np -import pytest - import pandas as pd import pandas._testing as tm @@ -9,25 +6,19 @@ class TestMap: def test_map(self): # map test on StringDType, GH#40823 df1 = pd.DataFrame( - { - "col1": [pd.NA, "foo", "bar"] - }, index=["id1", "id2", "id3"], dtype=pd.StringDtype() + {"col1": [pd.NA, "foo", "bar"]}, + index=["id1", "id2", "id3"], + dtype=pd.StringDtype(), ) - df2 = pd.DataFrame( - { - "id": ["id4", "id2", "id1"] - }, dtype=pd.StringDtype() - ) - + df2 = pd.DataFrame({"id": ["id4", "id2", "id1"]}, dtype=pd.StringDtype()) + df2["col1"] = df2["id"].map(df1["col1"]) - + result = df2 expected = pd.DataFrame( - { - "id": ["id4", "id2", "id1"], - "col1": [pd.NA, "foo", pd.NA] - }, dtype=pd.StringDtype() + {"id": ["id4", "id2", "id1"], "col1": [pd.NA, "foo", pd.NA]}, + dtype=pd.StringDtype(), ) - - tm.assert_frame_equal(result, expected) \ No newline at end of file + + tm.assert_frame_equal(result, expected) From c707f1bb018a0e9e4c71adbf885e4ccc8b4986a2 Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Tue, 1 Jun 2021 15:19:30 -0700 Subject: [PATCH 3/6] TST: Remove new test file since want to add test in existing file. --- pandas/tests/frame/methods/test_map.py | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 pandas/tests/frame/methods/test_map.py diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py deleted file mode 100644 index b453410594466..0000000000000 --- a/pandas/tests/frame/methods/test_map.py +++ /dev/null @@ -1,24 +0,0 @@ -import pandas as pd -import pandas._testing as tm - - -class TestMap: - def test_map(self): - # map test on StringDType, GH#40823 - df1 = pd.DataFrame( - {"col1": [pd.NA, "foo", "bar"]}, - index=["id1", "id2", "id3"], - dtype=pd.StringDtype(), - ) - - df2 = pd.DataFrame({"id": ["id4", "id2", "id1"]}, dtype=pd.StringDtype()) - - df2["col1"] = df2["id"].map(df1["col1"]) - - result = df2 - expected = pd.DataFrame( - {"id": ["id4", "id2", "id1"], "col1": [pd.NA, "foo", pd.NA]}, - dtype=pd.StringDtype(), - ) - - tm.assert_frame_equal(result, expected) From d8347c82af8fe62ab018a3cd4888f770e597492f Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Tue, 1 Jun 2021 15:23:55 -0700 Subject: [PATCH 4/6] TST: Add new test to pandas/tests/apply/test_series_apply.py --- pandas/tests/apply/test_series_apply.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 88c3ad228f8c3..34943768e8d49 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -115,6 +115,27 @@ def func(x): ser.apply(func) +def test_series_map_stringdtype(): + # map test on StringDType, GH#40823 + df1 = DataFrame( + {"col1": [pd.NA, "foo", "bar"]}, + index=["id1", "id2", "id3"], + dtype=pd.StringDtype(), + ) + + df2 = DataFrame({"id": ["id4", "id2", "id1"]}, dtype=pd.StringDtype()) + + df2["col1"] = df2["id"].map(df1["col1"]) + + result = df2 + expected = DataFrame( + {"id": ["id4", "id2", "id1"], "col1": [pd.NA, "foo", pd.NA]}, + dtype=pd.StringDtype(), + ) + + tm.assert_frame_equal(result, expected) + + def test_apply_box(): # ufunc will not be boxed. Same test cases as the test_map_box vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] From bf858404206a4c75a15a24dadda5f3e872b847e8 Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Tue, 1 Jun 2021 16:26:43 -0700 Subject: [PATCH 5/6] TST: Add updated test on Series (#40823). Checking map function on two series of data type StringDType. Series data is arbitrary. --- pandas/tests/apply/test_series_apply.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 34943768e8d49..12af553e8bd8b 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -117,23 +117,16 @@ def func(x): def test_series_map_stringdtype(): # map test on StringDType, GH#40823 - df1 = DataFrame( - {"col1": [pd.NA, "foo", "bar"]}, + ser1 = Series( + data=["cat", "dog", "rabbit"], index=["id1", "id2", "id3"], dtype=pd.StringDtype(), ) + ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=pd.StringDtype()) + result = ser2.map(ser1) + expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=pd.StringDtype()) - df2 = DataFrame({"id": ["id4", "id2", "id1"]}, dtype=pd.StringDtype()) - - df2["col1"] = df2["id"].map(df1["col1"]) - - result = df2 - expected = DataFrame( - {"id": ["id4", "id2", "id1"], "col1": [pd.NA, "foo", pd.NA]}, - dtype=pd.StringDtype(), - ) - - tm.assert_frame_equal(result, expected) + tm.assert_series_equal(result, expected) def test_apply_box(): From 078b1b64f9cf9872185abf0be3a9f568219db8bf Mon Sep 17 00:00:00 2001 From: Michael Hsieh Date: Tue, 1 Jun 2021 16:36:55 -0700 Subject: [PATCH 6/6] TST: Change StringDType to any string data type. Using suggested test fixture any_string_dtype. --- pandas/tests/apply/test_series_apply.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 12af553e8bd8b..6837bdb1a6b9c 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -115,16 +115,16 @@ def func(x): ser.apply(func) -def test_series_map_stringdtype(): +def test_series_map_stringdtype(any_string_dtype): # map test on StringDType, GH#40823 ser1 = Series( data=["cat", "dog", "rabbit"], index=["id1", "id2", "id3"], - dtype=pd.StringDtype(), + dtype=any_string_dtype, ) - ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=pd.StringDtype()) + ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype) result = ser2.map(ser1) - expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=pd.StringDtype()) + expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype) tm.assert_series_equal(result, expected)