pandas-dev · jreback · Nov 25, 2017 · Oct 27, 2016 · Oct 27, 2016 · Jan 7, 2017
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -123,6 +123,30 @@ def time_series_dropna_datetime(self):
         self.s.dropna()
 
 
+class series_map_dict(object):
+    goal_time = 0.2
+
+    def setup(self):
+        map_size = 1000
+        self.s = Series(np.random.randint(0, map_size, 10000))
+        self.map_dict = {i: map_size - i for i in range(map_size)}
+
+    def time_series_map_dict(self):
+        self.s.map(self.map_dict)
+
+
+class series_map_series(object):
+    goal_time = 0.2
+
+    def setup(self):
+        map_size = 1000
+        self.s = Series(np.random.randint(0, map_size, 10000))
+        self.map_series = Series(map_size - np.arange(map_size))
+
+    def time_series_map_series(self):
+        self.s.map(self.map_series)
+
+
 class series_clip(object):
     goal_time = 0.2
 

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -48,6 +48,7 @@ Other API Changes
 - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
 - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
 - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
+- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`).
 
 
 .. _whatsnew_0220.deprecations:
@@ -77,7 +78,7 @@ Performance Improvements
 - Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
 - :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
 - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
--
+- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`)
 
 .. _whatsnew_0220.docs:
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -12,11 +12,12 @@
     is_object_dtype,
     is_list_like,
     is_scalar,
-    is_datetimelike)
+    is_datetimelike,
+    is_extension_type)
 
 from pandas.util._validators import validate_bool_kwarg
 
-from pandas.core import common as com
+from pandas.core import common as com, algorithms
 import pandas.core.nanops as nanops
 import pandas._libs.lib as lib
 from pandas.compat.numpy import function as nv
@@ -838,6 +839,75 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                             klass=self.__class__.__name__, op=name))
         return func(**kwds)
 
+    def _map_values(self, mapper, na_action=None):
+        """An internal function that maps values using the input
+        correspondence (which can be a dict, Series, or function).
+
+        Parameters
+        ----------
+        mapper : function, dict, or Series
+            The input correspondence object
+        na_action : {None, 'ignore'}
+            If 'ignore', propagate NA values, without passing them to the
+            mapping function
+
+        Returns
+        -------
+        applied : Union[Index, MultiIndex], inferred
+            The output of the mapping function applied to the index.
+            If the function returns a tuple with more than one element
+            a MultiIndex will be returned.
+
+        """
+
+        # we can fastpath dict/Series to an efficient map
+        # as we know that we are not going to have to yield
+        # python types
+        if isinstance(mapper, dict):
+            if hasattr(mapper, '__missing__'):
+                # If a dictionary subclass defines a default value method,
+                # convert mapper to a lookup function (GH #15999).
+                dict_with_default = mapper
+                mapper = lambda x: dict_with_default[x]
+            else:
+                # Dictionary does not have a default. Thus it's safe to
+                # convert to an Series for efficiency.
+                from pandas import Series
+                mapper = Series(mapper, index=mapper.keys())
+
+        if isinstance(mapper, ABCSeries):
+            # Since values were input this means we came from either
+            # a dict or a series and mapper should be an index
+            if is_extension_type(self.dtype):
+                values = self._values
+            else:
+                values = self.values
+
+            indexer = mapper.index.get_indexer(values)
+            new_values = algorithms.take_1d(mapper._values, indexer)
+            return new_values
+
+        # we must convert to python types
+        if is_extension_type(self.dtype):
+            values = self._values
+            if na_action is not None:
+                raise NotImplementedError
+            map_f = lambda values, f: values.map(f)
+        else:
+            values = self.astype(object)
+            values = getattr(values, 'values', values)
+            if na_action == 'ignore':
+                def map_f(values, f):
+                    return lib.map_infer_mask(values, f,
+                                              isna(values).view(np.uint8))
+            else:
+                map_f = lib.map_infer
+
+        # mapper is a function
+        new_values = map_f(values, mapper)
+
+        return new_values
+
     def value_counts(self, normalize=False, sort=True, ascending=False,
                      bins=None, dropna=True):
         """

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -13,7 +13,6 @@
 from pandas.compat.numpy import function as nv
 from pandas import compat
 
-
 from pandas.core.dtypes.generic import (
     ABCSeries,
     ABCMultiIndex,
@@ -2821,6 +2820,27 @@ def get_indexer_for(self, target, **kwargs):
         indexer, _ = self.get_indexer_non_unique(target, **kwargs)
         return indexer
 
+    _index_shared_docs['_get_values_from_dict'] = """
+        Return the values of the input dictionary in the order the keys are
+        in the index. np.nan is returned for index values not in the
+        dictionary.
+
+        Parameters
+        ----------
+        data : dict
+            The dictionary from which to extract the values
+
+        Returns
+        -------
+        np.array
+
+        """
+
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        return lib.fast_multiget(data, self.values,
+                                 default=np.nan)
+
     def _maybe_promote(self, other):
         # A hack, but it works
         from pandas.core.indexes.datetimes import DatetimeIndex
@@ -2859,13 +2879,16 @@ def groupby(self, values):
 
         return result
 
-    def map(self, mapper):
-        """Apply mapper function to an index.
+    def map(self, mapper, na_action=None):
+        """Map values of Series using input correspondence (which can be a
+        dict, Series, or function)
 
         Parameters
         ----------
-        mapper : callable
-            Function to be applied.
+        mapper : function, dict, or Series
+        na_action : {None, 'ignore'}
+            If 'ignore', propagate NA values, without passing them to the
+            mapping function
 
         Returns
         -------
@@ -2875,15 +2898,23 @@ def map(self, mapper):
             a MultiIndex will be returned.
 
         """
+
         from .multi import MultiIndex
-        mapped_values = self._arrmap(self.values, mapper)
+        new_values = super(Index, self)._map_values(
+            mapper, na_action=na_action)
         attributes = self._get_attributes_dict()
-        if mapped_values.size and isinstance(mapped_values[0], tuple):
-            return MultiIndex.from_tuples(mapped_values,
-                                          names=attributes.get('name'))
+        if new_values.size and isinstance(new_values[0], tuple):
+            if isinstance(self, MultiIndex):
+                names = self.names
+            elif attributes.get('name'):
+                names = [attributes.get('name')] * len(new_values[0])
+            else:
+                names = None
+            return MultiIndex.from_tuples(new_values,
+                                          names=names)
 
         attributes['copy'] = False
-        return Index(mapped_values, **attributes)
+        return Index(new_values, **attributes)
 
     def isin(self, values, level=None):
         """

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -352,7 +352,7 @@ def map(self, f):
 
             # Try to use this result if we can
             if isinstance(result, np.ndarray):
-                self._shallow_copy(result)
+                result = Index(result)
 
             if not isinstance(result, Index):
                 raise TypeError('The map function must return an Index object')
@@ -698,6 +698,14 @@ def __rsub__(self, other):
     def _add_delta(self, other):
         return NotImplemented
 
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        if len(data):
+            return np.array([data.get(i, np.nan)
+                             for i in self.asobject.values])
+
+        return np.array([np.nan])
+
     def _add_delta_td(self, other):
         # add a delta of a timedeltalike
         # return the i8 result view

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -1451,6 +1451,17 @@ def get_value_maybe_box(self, series, key):
                                         key, tz=self.tz)
         return _maybe_box(self, values, series, key)
 
+    @Appender(_index_shared_docs['_get_values_from_dict'])
+    def _get_values_from_dict(self, data):
+        if len(data):
+            # coerce back to datetime objects for lookup
+            data = com._dict_compat(data)
+            return lib.fast_multiget(data,
+                                     self.asobject.values,
+                                     default=np.nan)
+
+        return np.array([np.nan])
+
     def get_loc(self, key, method=None, tolerance=None):
         """
         Get integer location for requested label

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -44,7 +44,6 @@
                                 _maybe_match_name,
                                 SettingWithCopyError,
                                 _maybe_box_datetimelike,
-                                _dict_compat,
                                 standardize_mapping,
                                 _any_none)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
@@ -202,23 +201,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                         index = Index(data)
                     else:
                         index = Index(_try_sort(data))
+
                 try:
-                    if isinstance(index, DatetimeIndex):
-                        if len(data):
-                            # coerce back to datetime objects for lookup
-                            data = _dict_compat(data)
-                            data = lib.fast_multiget(data,
-                                                     index.asobject.values,
-                                                     default=np.nan)
-                        else:
-                            data = np.nan
-                    # GH #12169
-                    elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
-                        data = ([data.get(i, np.nan) for i in index]
-                                if data else np.nan)
-                    else:
-                        data = lib.fast_multiget(data, index.values,
-                                                 default=np.nan)
+                    data = index._get_values_from_dict(data)
                 except TypeError:
                     data = ([data.get(i, np.nan) for i in index]
                             if data else np.nan)
@@ -2245,14 +2230,14 @@ def unstack(self, level=-1, fill_value=None):
     # ----------------------------------------------------------------------
     # function application
 
-    def map(self, arg, na_action=None):
+    def map(self, mapper, na_action=None):
         """
         Map values of Series using input correspondence (which can be
         a dict, Series, or function)
 
         Parameters
         ----------
-        arg : function, dict, or Series
+        mapper : function, dict, or Series
         na_action : {None, 'ignore'}
             If 'ignore', propagate NA values, without passing them to the
             mapping function
@@ -2285,7 +2270,7 @@ def map(self, arg, na_action=None):
         two   bar
         three baz
 
-        If `arg` is a dictionary, return a new Series with values converted
+        If `mapper` is a dictionary, return a new Series with values converted
         according to the dictionary's mapping:
 
         >>> z = {1: 'A', 2: 'B', 3: 'C'}
@@ -2322,7 +2307,7 @@ def map(self, arg, na_action=None):
 
         Notes
         -----
-        When `arg` is a dictionary, values in Series that are not in the
+        When `mapper` is a dictionary, values in Series that are not in the
         dictionary (as keys) are converted to ``NaN``. However, if the
         dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
         provides a method for default values), then this default is used
@@ -2337,41 +2322,8 @@ def map(self, arg, na_action=None):
         3    0
         dtype: int64
         """
-
-        if is_extension_type(self.dtype):
-            values = self._values
-            if na_action is not None:
-                raise NotImplementedError
-            map_f = lambda values, f: values.map(f)
-        else:
-            values = self.asobject
-
-            if na_action == 'ignore':
-                def map_f(values, f):
-                    return lib.map_infer_mask(values, f,
-                                              isna(values).view(np.uint8))
-            else:
-                map_f = lib.map_infer
-
-        if isinstance(arg, dict):
-            if hasattr(arg, '__missing__'):
-                # If a dictionary subclass defines a default value method,
-                # convert arg to a lookup function (GH #15999).
-                dict_with_default = arg
-                arg = lambda x: dict_with_default[x]
-            else:
-                # Dictionary does not have a default. Thus it's safe to
-                # convert to an indexed series for efficiency.
-                arg = self._constructor(arg, index=arg.keys())
-
-        if isinstance(arg, Series):
-            # arg is a Series
-            indexer = arg.index.get_indexer(values)
-            new_values = algorithms.take_1d(arg._values, indexer)
-        else:
-            # arg is a function
-            new_values = map_f(values, arg)
-
+        new_values = super(Series, self)._map_values(
+            mapper, na_action=na_action)
         return self._constructor(new_values,
                                  index=self.index).__finalize__(self)