diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index efea1fc1f525f..6a299bc415f49 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -223,6 +223,7 @@ Other enhancements - :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`) - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`) +- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/conftest.py b/pandas/conftest.py index 0a3bf31cf9666..eb7263fe116cc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,3 +1,4 @@ +from collections import abc from datetime import date, time, timedelta, timezone from decimal import Decimal import operator @@ -894,3 +895,38 @@ def index_or_series(request): See GH#29725 """ return request.param + + +@pytest.fixture +def dict_subclass(): + """ + Fixture for a dictionary subclass. + """ + + class TestSubDict(dict): + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) + + return TestSubDict + + +@pytest.fixture +def non_mapping_dict_subclass(): + """ + Fixture for a non-mapping dictionary subclass. + """ + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict): + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self): + return self._data.__iter__() + + def __len__(self): + return self._data.__len__() + + return TestNonDictMapping diff --git a/pandas/core/base.py b/pandas/core/base.py index 064a51bf0ce74..ef7e59c9e19d7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -19,6 +19,7 @@ is_categorical_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, + is_dict_like, is_extension_array_dtype, is_list_like, is_object_dtype, @@ -1107,8 +1108,8 @@ def _map_values(self, mapper, na_action=None): # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types - if isinstance(mapper, dict): - if hasattr(mapper, "__missing__"): + if is_dict_like(mapper): + if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): # If a dictionary subclass defines a default value method, # convert mapper to a lookup function (GH #15999). dict_with_default = mapper diff --git a/pandas/core/series.py b/pandas/core/series.py index aa5af9bb893fa..8936420053607 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -182,6 +182,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): def __init__( self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False ): + # we are called internally, so short-circuit if fastpath: @@ -250,7 +251,7 @@ def __init__( else: data = data.reindex(index, copy=copy) data = data._data - elif isinstance(data, dict): + elif is_dict_like(data): data, index = self._init_dict(data, index, dtype) dtype = None copy = False @@ -3513,7 +3514,7 @@ def map(self, arg, na_action=None): Parameters ---------- - arg : function, dict, or Series + arg : function, collections.abc.Mapping subclass or Series Mapping correspondence. na_action : {None, 'ignore'}, default None If 'ignore', propagate NaN values, without passing them to the diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ffdf1435f74e0..4e7d8c3054cf2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -511,17 +511,17 @@ def test_constructor_with_embedded_frames(self): result = df2.loc[1, 0] tm.assert_frame_equal(result, df1 + 10) - def test_constructor_subclass_dict(self, float_frame): + def test_constructor_subclass_dict(self, float_frame, dict_subclass): # Test for passing dict subclass to constructor data = { - "col1": tm.TestSubDict((x, 10.0 * x) for x in range(10)), - "col2": tm.TestSubDict((x, 20.0 * x) for x in range(10)), + "col1": dict_subclass((x, 10.0 * x) for x in range(10)), + "col2": dict_subclass((x, 20.0 * x) for x in range(10)), } df = DataFrame(data) refdf = DataFrame({col: dict(val.items()) for col, val in data.items()}) tm.assert_frame_equal(refdf, df) - data = tm.TestSubDict(data.items()) + data = dict_subclass(data.items()) df = DataFrame(data) tm.assert_frame_equal(refdf, df) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index a187a1362297c..89a60d371770a 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -126,8 +126,8 @@ def test_constructor_dict(self): expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) tm.assert_series_equal(result, expected) - def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) + def test_constructor_subclass_dict(self, dict_subclass): + data = dict_subclass((x, 10.0 * x) for x in range(10)) series = Series(data) expected = Series(dict(data.items())) tm.assert_series_equal(series, expected) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 30b8b5c7c8545..37bedc1ab7508 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -627,6 +627,30 @@ class DictWithoutMissing(dict): expected = Series([np.nan, np.nan, "three"]) tm.assert_series_equal(result, expected) + def test_map_abc_mapping(self, non_mapping_dict_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + s = Series([1, 2, 3]) + not_a_dictionary = non_mapping_dict_subclass({3: "three"}) + result = s.map(not_a_dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + def test_map_abc_mapping_with_missing(self, non_mapping_dict_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + class NonDictMappingWithMissing(non_mapping_dict_subclass): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + not_a_dictionary = NonDictMappingWithMissing({3: "three"}) + result = s.map(not_a_dictionary) + # __missing__ is a dict concept, not a Mapping concept, + # so it should not change the result! + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + def test_map_box(self): vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] s = pd.Series(vals) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 20a83ec4cd162..1c3f1404215d3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1089,6 +1089,14 @@ def create_data(constructor): tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) + def test_constructor_mapping(self, non_mapping_dict_subclass): + # GH 29788 + ndm = non_mapping_dict_subclass({3: "three"}) + result = Series(ndm) + expected = Series(["three"], index=[3]) + + tm.assert_series_equal(result, expected) + def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c31cddc102afb..2e201339d4d77 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2123,11 +2123,6 @@ def makeMissingDataframe(density=0.9, random_state=None): return df -class TestSubDict(dict): - def __init__(self, *args, **kwargs): - dict.__init__(self, *args, **kwargs) - - def optional_args(decorator): """allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that