pandas-dev · jreback · Nov 25, 2017 · Oct 27, 2016 · Oct 27, 2016 · Jan 7, 2017
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -123,6 +123,30 @@ def time_series_dropna_datetime(self):
         self.s.dropna()
 
 
+class series_map_dict(object):
+    goal_time = 0.2
+
+    def setup(self):
+        map_size = 1000
+        self.s = Series(np.random.randint(0, map_size, 10000))
+        self.map_dict = {i: map_size - i for i in range(map_size)}
+
+    def time_series_map_dict(self):
+        self.s.map(self.map_dict)
+
+
+class series_map_series(object):
+    goal_time = 0.2
+
+    def setup(self):
+        map_size = 1000
+        self.s = Series(np.random.randint(0, map_size, 10000))
+        self.map_series = Series(map_size - np.arange(map_size))
+
+    def time_series_map_series(self):
+        self.s.map(self.map_series)
+
+
 class series_clip(object):
     goal_time = 0.2
 

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -48,6 +48,7 @@ Other API Changes
 - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
 - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
 - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
+- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`).
 
 
 .. _whatsnew_0220.deprecations:
@@ -77,7 +78,7 @@ Performance Improvements
 - Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
 - :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
 - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
--
+- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`)
 
 .. _whatsnew_0220.docs:
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -12,11 +12,12 @@
     is_object_dtype,
     is_list_like,
     is_scalar,
-    is_datetimelike)
+    is_datetimelike,
+    is_extension_type)
 
 from pandas.util._validators import validate_bool_kwarg
 
-from pandas.core import common as com
+from pandas.core import common as com, algorithms
 import pandas.core.nanops as nanops
 import pandas._libs.lib as lib
 from pandas.compat.numpy import function as nv
@@ -838,6 +839,75 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                             klass=self.__class__.__name__, op=name))
         return func(**kwds)
 
+    def _map_values(self, arg, na_action=None):
+        """An internal function that maps values using the input
+        correspondence (which can be a dict, Series, or function).
+
+        Parameters
+        ----------
+        arg : function, dict, or Series
+            The input correspondence object
+        na_action : {None, 'ignore'}
+            If 'ignore', propagate NA values, without passing them to the
+            mapping function
+
+        Returns
+        -------
+        applied : Union[Index, MultiIndex], inferred
+            The output of the mapping function applied to the index.
+            If the function returns a tuple with more than one element
+            a MultiIndex will be returned.
+
+        """
+
+        # we can fastpath dict/Series to an efficient map
+        # as we know that we are not going to have to yield
+        # python types
+        if isinstance(arg, dict):
+            if hasattr(arg, '__missing__'):
+                # If a dictionary subclass defines a default value method,
+                # convert arg to a lookup function (GH #15999).
+                dict_with_default = arg
+                arg = lambda x: dict_with_default[x]
+            else:
+                # Dictionary does not have a default. Thus it's safe to
+                # convert to an Series for efficiency.
+                from pandas import Series
+                arg = Series(arg, index=arg.keys())
+
+        if isinstance(arg, ABCSeries):
+            # Since values were input this means we came from either
+            # a dict or a series and arg should be an index
+            if is_extension_type(self.dtype):
+                values = self._values
+            else:
+                values = self.values
+
+            indexer = arg.index.get_indexer(values)
+            new_values = algorithms.take_1d(arg._values, indexer)
+            return new_values
+
+        # we must convert to python types
+        if is_extension_type(self.dtype):
+            values = self._values
+            if na_action is not None:
+                raise NotImplementedError
+            map_f = lambda values, f: values.map(f)
+        else:
+            values = self.astype(object)
+            values = getattr(values, 'values', values)
+            if na_action == 'ignore':
+                def map_f(values, f):
+                    return lib.map_infer_mask(values, f,
+                                              isna(values).view(np.uint8))
+            else:
+                map_f = lib.map_infer
+
+        # arg is a function
+        new_values = map_f(values, arg)
+
+        return new_values
+
     def value_counts(self, normalize=False, sort=True, ascending=False,
                      bins=None, dropna=True):
         """

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -13,7 +13,6 @@
 from pandas.compat.numpy import function as nv
 from pandas import compat
 
-
 from pandas.core.dtypes.generic import (
     ABCSeries,
     ABCMultiIndex,
@@ -2821,6 +2820,27 @@ def get_indexer_for(self, target, **kwargs):
         indexer, _ = self.get_indexer_non_unique(target, **kwargs)
         return indexer
 
+    _index_shared_docs['_get_values_from_dict'] = """
+        Return the values of the input dictionary in the order the keys are
+        in the index. np.nan is returned for index values not in the
+        dictionary.
+
+        Parameters
+        ----------
+        data : dict
+            The dictionary from which to extract the values
+
+        Returns
+        -------
+        np.array
+
+        """
+
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        return lib.fast_multiget(data, self.values,
+                                 default=np.nan)
+
     def _maybe_promote(self, other):
         # A hack, but it works
         from pandas.core.indexes.datetimes import DatetimeIndex
@@ -2859,13 +2879,16 @@ def groupby(self, values):
 
         return result
 
-    def map(self, mapper):
-        """Apply mapper function to an index.
+    def map(self, arg, na_action=None):
+        """Map values of Series using input correspondence (which can be a
+        dict, Series, or function)
 
         Parameters
         ----------
-        mapper : callable
-            Function to be applied.
+        arg : function, dict, or Series
+        na_action : {None, 'ignore'}
+            If 'ignore', propagate NA values, without passing them to the
+            mapping function
 
         Returns
         -------
@@ -2875,15 +2898,23 @@ def map(self, mapper):
             a MultiIndex will be returned.
 
         """
+
         from .multi import MultiIndex
-        mapped_values = self._arrmap(self.values, mapper)
+        new_values = super(Index, self)._map_values(
+            arg, na_action=na_action)
         attributes = self._get_attributes_dict()
-        if mapped_values.size and isinstance(mapped_values[0], tuple):
-            return MultiIndex.from_tuples(mapped_values,
-                                          names=attributes.get('name'))
+        if new_values.size and isinstance(new_values[0], tuple):
+            if isinstance(self, MultiIndex):
+                names = self.names
+            elif attributes.get('name'):
+                names = [attributes.get('name')] * len(new_values[0])
+            else:
+                names = None
+            return MultiIndex.from_tuples(new_values,
+                                          names=names)
 
         attributes['copy'] = False
-        return Index(mapped_values, **attributes)
+        return Index(new_values, **attributes)
 
     def isin(self, values, level=None):
         """

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -352,7 +352,7 @@ def map(self, f):
 
             # Try to use this result if we can
             if isinstance(result, np.ndarray):
-                self._shallow_copy(result)
+                result = Index(result)
 
             if not isinstance(result, Index):
                 raise TypeError('The map function must return an Index object')
@@ -698,6 +698,14 @@ def __rsub__(self, other):
     def _add_delta(self, other):
         return NotImplemented
 
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        if len(data):
+            return np.array([data.get(i, np.nan)
+                             for i in self.asobject.values])
+
+        return np.array([np.nan])
+
     def _add_delta_td(self, other):
         # add a delta of a timedeltalike
         # return the i8 result view

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -1451,6 +1451,17 @@ def get_value_maybe_box(self, series, key):
                                         key, tz=self.tz)
         return _maybe_box(self, values, series, key)
 
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        if len(data):
+            # coerce back to datetime objects for lookup
+            data = com._dict_compat(data)
+            return lib.fast_multiget(data,
+                                     self.asobject.values,
+                                     default=np.nan)
+
+        return np.array([np.nan])
+
     def get_loc(self, key, method=None, tolerance=None):
         """
         Get integer location for requested label

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -44,7 +44,6 @@
                                 _maybe_match_name,
                                 SettingWithCopyError,
                                 _maybe_box_datetimelike,
-                                _dict_compat,
                                 standardize_mapping,
                                 _any_none)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
@@ -202,23 +201,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                         index = Index(data)
                     else:
                         index = Index(_try_sort(data))
+
                 try:
-                    if isinstance(index, DatetimeIndex):
-                        if len(data):
-                            # coerce back to datetime objects for lookup
-                            data = _dict_compat(data)
-                            data = lib.fast_multiget(data,
-                                                     index.asobject.values,
-                                                     default=np.nan)
-                        else:
-                            data = np.nan
-                    # GH #12169
-                    elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
-                        data = ([data.get(i, np.nan) for i in index]
-                                if data else np.nan)
-                    else:
-                        data = lib.fast_multiget(data, index.values,
-                                                 default=np.nan)
+                    data = index._get_values_from_dict(data)
                 except TypeError:
                     data = ([data.get(i, np.nan) for i in index]
                             if data else np.nan)
@@ -2337,41 +2322,8 @@ def map(self, arg, na_action=None):
         3    0
         dtype: int64
         """
-
-        if is_extension_type(self.dtype):
-            values = self._values
-            if na_action is not None:
-                raise NotImplementedError
-            map_f = lambda values, f: values.map(f)
-        else:
-            values = self.asobject
-
-            if na_action == 'ignore':
-                def map_f(values, f):
-                    return lib.map_infer_mask(values, f,
-                                              isna(values).view(np.uint8))
-            else:
-                map_f = lib.map_infer
-
-        if isinstance(arg, dict):
-            if hasattr(arg, '__missing__'):
-                # If a dictionary subclass defines a default value method,
-                # convert arg to a lookup function (GH #15999).
-                dict_with_default = arg
-                arg = lambda x: dict_with_default[x]
-            else:
-                # Dictionary does not have a default. Thus it's safe to
-                # convert to an indexed series for efficiency.
-                arg = self._constructor(arg, index=arg.keys())
-
-        if isinstance(arg, Series):
-            # arg is a Series
-            indexer = arg.index.get_indexer(values)
-            new_values = algorithms.take_1d(arg._values, indexer)
-        else:
-            # arg is a function
-            new_values = map_f(values, arg)
-
+        new_values = super(Series, self)._map_values(
+            arg, na_action=na_action)
         return self._constructor(new_values,
                                  index=self.index).__finalize__(self)
 

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -996,3 +996,29 @@ def test_searchsorted_monotonic(self, indices):
             # non-monotonic should raise.
             with pytest.raises(ValueError):
                 indices._searchsorted_monotonic(value, side='left')
+
+    def test_map(self):
+        index = self.create_index()
+        # From output of UInt64Index mapping can't infer that we
+        #   shouldn't default to Int64
+        if isinstance(index, UInt64Index):
+            expected = Index(index.values.tolist())
+        else:
+            expected = index
+
+        tm.assert_index_equal(index.map(lambda x: x), expected)
+
+        identity_dict = {x: x for x in index}
+        tm.assert_index_equal(index.map(identity_dict), expected)
+
+        # Use values to work around MultiIndex instantiation of series
+        identity_series = Series(expected.values, index=index)
+        tm.assert_index_equal(index.map(identity_series), expected)
+
+        # empty mappable
+        nan_index = pd.Index([np.nan] * len(index))
+        series_map = pd.Series()
+        tm.assert_index_equal(index.map(series_map), nan_index)
+
+        dict_map = {}
+        tm.assert_index_equal(index.map(dict_map), nan_index)