Skip to content

ENH: Allow map with abc mapping #29788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ Other enhancements
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)

Build Changes
^^^^^^^^^^^^^
Expand Down
36 changes: 36 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import abc
from datetime import date, time, timedelta, timezone
from decimal import Decimal
import operator
Expand Down Expand Up @@ -894,3 +895,38 @@ def index_or_series(request):
See GH#29725
"""
return request.param


@pytest.fixture
def dict_subclass():
"""
Fixture for a dictionary subclass.
"""

class TestSubDict(dict):
def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)

return TestSubDict


@pytest.fixture
def non_mapping_dict_subclass():
"""
Fixture for a non-mapping dictionary subclass.
"""

class TestNonDictMapping(abc.Mapping):
def __init__(self, underlying_dict):
self._data = underlying_dict

def __getitem__(self, key):
return self._data.__getitem__(key)

def __iter__(self):
return self._data.__iter__()

def __len__(self):
return self._data.__len__()

return TestNonDictMapping
5 changes: 3 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
is_categorical_dtype,
is_datetime64_ns_dtype,
is_datetime64tz_dtype,
is_dict_like,
is_extension_array_dtype,
is_list_like,
is_object_dtype,
Expand Down Expand Up @@ -1107,8 +1108,8 @@ def _map_values(self, mapper, na_action=None):
# we can fastpath dict/Series to an efficient map
# as we know that we are not going to have to yield
# python types
if isinstance(mapper, dict):
if hasattr(mapper, "__missing__"):
if is_dict_like(mapper):
if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
# If a dictionary subclass defines a default value method,
# convert mapper to a lookup function (GH #15999).
dict_with_default = mapper
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
def __init__(
self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
):

# we are called internally, so short-circuit
if fastpath:

Expand Down Expand Up @@ -250,7 +251,7 @@ def __init__(
else:
data = data.reindex(index, copy=copy)
data = data._data
elif isinstance(data, dict):
elif is_dict_like(data):
data, index = self._init_dict(data, index, dtype)
dtype = None
copy = False
Expand Down Expand Up @@ -3513,7 +3514,7 @@ def map(self, arg, na_action=None):

Parameters
----------
arg : function, dict, or Series
arg : function, collections.abc.Mapping subclass or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NaN values, without passing them to the
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,17 +511,17 @@ def test_constructor_with_embedded_frames(self):
result = df2.loc[1, 0]
tm.assert_frame_equal(result, df1 + 10)

def test_constructor_subclass_dict(self, float_frame):
def test_constructor_subclass_dict(self, float_frame, dict_subclass):
# Test for passing dict subclass to constructor
data = {
"col1": tm.TestSubDict((x, 10.0 * x) for x in range(10)),
"col2": tm.TestSubDict((x, 20.0 * x) for x in range(10)),
"col1": dict_subclass((x, 10.0 * x) for x in range(10)),
"col2": dict_subclass((x, 20.0 * x) for x in range(10)),
}
df = DataFrame(data)
refdf = DataFrame({col: dict(val.items()) for col, val in data.items()})
tm.assert_frame_equal(refdf, df)

data = tm.TestSubDict(data.items())
data = dict_subclass(data.items())
df = DataFrame(data)
tm.assert_frame_equal(refdf, df)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def test_constructor_dict(self):
expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
tm.assert_series_equal(result, expected)

def test_constructor_subclass_dict(self):
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
def test_constructor_subclass_dict(self, dict_subclass):
data = dict_subclass((x, 10.0 * x) for x in range(10))
series = Series(data)
expected = Series(dict(data.items()))
tm.assert_series_equal(series, expected)
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,30 @@ class DictWithoutMissing(dict):
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

def test_map_abc_mapping(self, non_mapping_dict_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
s = Series([1, 2, 3])
not_a_dictionary = non_mapping_dict_subclass({3: "three"})
result = s.map(not_a_dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

def test_map_abc_mapping_with_missing(self, non_mapping_dict_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
class NonDictMappingWithMissing(non_mapping_dict_subclass):
def __missing__(self, key):
return "missing"

s = Series([1, 2, 3])
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
result = s.map(not_a_dictionary)
# __missing__ is a dict concept, not a Mapping concept,
# so it should not change the result!
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

def test_map_box(self):
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
s = pd.Series(vals)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,14 @@ def create_data(constructor):
tm.assert_series_equal(result_datetime, expected)
tm.assert_series_equal(result_Timestamp, expected)

def test_constructor_mapping(self, non_mapping_dict_subclass):
# GH 29788
ndm = non_mapping_dict_subclass({3: "three"})
result = Series(ndm)
expected = Series(["three"], index=[3])

tm.assert_series_equal(result, expected)

def test_constructor_list_of_tuples(self):
data = [(1, 1), (2, 2), (2, 3)]
s = Series(data)
Expand Down
5 changes: 0 additions & 5 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2123,11 +2123,6 @@ def makeMissingDataframe(density=0.9, random_state=None):
return df


class TestSubDict(dict):
def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)


def optional_args(decorator):
"""allows a decorator to take optional positional and keyword arguments.
Assumes that taking a single, callable, positional argument means that
Expand Down