diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 3c0e2869357ae..5e8cf3a0350bb 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -123,6 +123,30 @@ def time_series_dropna_datetime(self): self.s.dropna() +class series_map_dict(object): + goal_time = 0.2 + + def setup(self): + map_size = 1000 + self.s = Series(np.random.randint(0, map_size, 10000)) + self.map_dict = {i: map_size - i for i in range(map_size)} + + def time_series_map_dict(self): + self.s.map(self.map_dict) + + +class series_map_series(object): + goal_time = 0.2 + + def setup(self): + map_size = 1000 + self.s = Series(np.random.randint(0, map_size, 10000)) + self.map_series = Series(map_size - np.arange(map_size)) + + def time_series_map_series(self): + self.s.map(self.map_series) + + class series_clip(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 5549ba4e8f735..f97b958d553e0 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -75,6 +75,7 @@ Other API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`). - :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) @@ -108,6 +109,7 @@ Performance Improvements - Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) - :class`DateOffset` arithmetic performance is improved (:issue:`18218`) - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) +- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) - The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 90fe350848bf7..cce0f384cb983 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -12,11 +12,12 @@ is_object_dtype, is_list_like, is_scalar, - is_datetimelike) + is_datetimelike, + is_extension_type) from pandas.util._validators import validate_bool_kwarg -from pandas.core import common as com +from pandas.core import common as com, algorithms import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import function as nv @@ -838,6 +839,78 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, klass=self.__class__.__name__, op=name)) return func(**kwds) + def _map_values(self, mapper, na_action=None): + """An internal function that maps values using the input + correspondence (which can be a dict, Series, or function). + + Parameters + ---------- + mapper : function, dict, or Series + The input correspondence object + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping function + + Returns + ------- + applied : Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + + """ + + # we can fastpath dict/Series to an efficient map + # as we know that we are not going to have to yield + # python types + if isinstance(mapper, dict): + if hasattr(mapper, '__missing__'): + # If a dictionary subclass defines a default value method, + # convert mapper to a lookup function (GH #15999). + dict_with_default = mapper + mapper = lambda x: dict_with_default[x] + else: + # Dictionary does not have a default. Thus it's safe to + # convert to an Series for efficiency. + # we specify the keys here to handle the + # possibility that they are tuples + from pandas import Series + mapper = Series(mapper, index=mapper.keys()) + + if isinstance(mapper, ABCSeries): + # Since values were input this means we came from either + # a dict or a series and mapper should be an index + if is_extension_type(self.dtype): + values = self._values + else: + values = self.values + + indexer = mapper.index.get_indexer(values) + new_values = algorithms.take_1d(mapper._values, indexer) + + return new_values + + # we must convert to python types + if is_extension_type(self.dtype): + values = self._values + if na_action is not None: + raise NotImplementedError + map_f = lambda values, f: values.map(f) + else: + values = self.astype(object) + values = getattr(values, 'values', values) + if na_action == 'ignore': + def map_f(values, f): + return lib.map_infer_mask(values, f, + isna(values).view(np.uint8)) + else: + map_f = lib.map_infer + + # mapper is a function + new_values = map_f(values, mapper) + + return new_values + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bc8aacfe90170..a97b84ab9cc5b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1127,3 +1127,38 @@ def cast_scalar_to_array(shape, value, dtype=None): values.fill(fill_value) return values + + +def construct_1d_arraylike_from_scalar(value, length, dtype): + """ + create a np.ndarray / pandas type of specified shape and dtype + filled with values + + Parameters + ---------- + value : scalar value + length : int + dtype : pandas_dtype / np.dtype + + Returns + ------- + np.ndarray / pandas type of length, filled with value + + """ + if is_datetimetz(dtype): + from pandas import DatetimeIndex + subarr = DatetimeIndex([value] * length, dtype=dtype) + elif is_categorical_dtype(dtype): + from pandas import Categorical + subarr = Categorical([value] * length) + else: + if not isinstance(dtype, (np.dtype, type(np.dtype))): + dtype = dtype.dtype + + # coerce if we have nan for an integer dtype + if is_integer_dtype(dtype) and isna(value): + dtype = np.float64 + subarr = np.empty(length, dtype=dtype) + subarr.fill(value) + + return subarr diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 7cae536c5edd9..ce57b544d9d66 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -369,13 +369,14 @@ def _maybe_fill(arr, fill_value=np.nan): return arr -def na_value_for_dtype(dtype): +def na_value_for_dtype(dtype, compat=True): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype + compat : boolean, default True Returns ------- @@ -389,7 +390,9 @@ def na_value_for_dtype(dtype): elif is_float_dtype(dtype): return np.nan elif is_integer_dtype(dtype): - return 0 + if compat: + return 0 + return np.nan elif is_bool_dtype(dtype): return False return np.nan diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index af9e29a84b472..8a751f0204b60 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -13,7 +13,6 @@ from pandas.compat.numpy import function as nv from pandas import compat - from pandas.core.dtypes.generic import ( ABCSeries, ABCMultiIndex, @@ -2827,6 +2826,27 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target, **kwargs) return indexer + _index_shared_docs['_get_values_from_dict'] = """ + Return the values of the input dictionary in the order the keys are + in the index. np.nan is returned for index values not in the + dictionary. + + Parameters + ---------- + data : dict + The dictionary from which to extract the values + + Returns + ------- + np.array + + """ + + @Appender(_index_shared_docs['_get_values_from_dict']) + def _get_values_from_dict(self, data): + return lib.fast_multiget(data, self.values, + default=np.nan) + def _maybe_promote(self, other): # A hack, but it works from pandas.core.indexes.datetimes import DatetimeIndex @@ -2865,13 +2885,15 @@ def groupby(self, values): return result - def map(self, mapper): - """Apply mapper function to an index. + def map(self, mapper, na_action=None): + """Map values of Series using input correspondence Parameters ---------- - mapper : callable - Function to be applied. + mapper : function, dict, or Series + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping function Returns ------- @@ -2881,15 +2903,26 @@ def map(self, mapper): a MultiIndex will be returned. """ + from .multi import MultiIndex - mapped_values = self._arrmap(self.values, mapper) + new_values = super(Index, self)._map_values( + mapper, na_action=na_action) attributes = self._get_attributes_dict() - if mapped_values.size and isinstance(mapped_values[0], tuple): - return MultiIndex.from_tuples(mapped_values, - names=attributes.get('name')) + if new_values.size and isinstance(new_values[0], tuple): + if isinstance(self, MultiIndex): + names = self.names + elif attributes.get('name'): + names = [attributes.get('name')] * len(new_values[0]) + else: + names = None + return MultiIndex.from_tuples(new_values, + names=names) attributes['copy'] = False - return Index(mapped_values, **attributes) + + # we infer the result types based on the + # returned values + return Index(new_values, **attributes) def isin(self, values, level=None): """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4934ccb49b844..5643d886a4fec 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -136,7 +136,7 @@ def equals(self, other): elif not isinstance(other, type(self)): try: other = type(self)(other) - except: + except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): @@ -352,7 +352,7 @@ def map(self, f): # Try to use this result if we can if isinstance(result, np.ndarray): - self._shallow_copy(result) + result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') @@ -698,6 +698,14 @@ def __rsub__(self, other): def _add_delta(self, other): return NotImplemented + @Appender(_index_shared_docs['_get_values_from_dict']) + def _get_values_from_dict(self, data): + if len(data): + return np.array([data.get(i, np.nan) + for i in self.asobject.values]) + + return np.array([np.nan]) + def _add_delta_td(self, other): # add a delta of a timedeltalike # return the i8 result view diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 111ba0c92aa9b..e1def38289243 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1457,6 +1457,17 @@ def get_value_maybe_box(self, series, key): key, tz=self.tz) return _maybe_box(self, values, series, key) + @Appender(_index_shared_docs['_get_values_from_dict']) + def _get_values_from_dict(self, data): + if len(data): + # coerce back to datetime objects for lookup + data = com._dict_compat(data) + return lib.fast_multiget(data, + self.asobject.values, + default=np.nan) + + return np.array([np.nan]) + def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label diff --git a/pandas/core/series.py b/pandas/core/series.py index d7833526c0408..bff7c21ad69b1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -18,7 +18,7 @@ is_bool, is_integer, is_integer_dtype, is_float_dtype, - is_extension_type, is_datetimetz, + is_extension_type, is_datetime64tz_dtype, is_timedelta64_dtype, is_list_like, @@ -34,7 +34,8 @@ from pandas.core.dtypes.cast import ( maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, - maybe_cast_to_datetime, maybe_castable) + maybe_cast_to_datetime, maybe_castable, + construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike from pandas.core.common import (is_bool_indexer, @@ -45,7 +46,6 @@ _maybe_match_name, SettingWithCopyError, _maybe_box_datetimelike, - _dict_compat, standardize_mapping, _any_none) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, @@ -203,23 +203,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None, index = Index(data) else: index = Index(_try_sort(data)) + try: - if isinstance(index, DatetimeIndex): - if len(data): - # coerce back to datetime objects for lookup - data = _dict_compat(data) - data = lib.fast_multiget(data, - index.asobject.values, - default=np.nan) - else: - data = np.nan - # GH #12169 - elif isinstance(index, (PeriodIndex, TimedeltaIndex)): - data = ([data.get(i, np.nan) for i in index] - if data else np.nan) - else: - data = lib.fast_multiget(data, index.values, - default=np.nan) + data = index._get_values_from_dict(data) except TypeError: data = ([data.get(i, np.nan) for i in index] if data else np.nan) @@ -2338,41 +2324,8 @@ def map(self, arg, na_action=None): 3 0 dtype: int64 """ - - if is_extension_type(self.dtype): - values = self._values - if na_action is not None: - raise NotImplementedError - map_f = lambda values, f: values.map(f) - else: - values = self.asobject - - if na_action == 'ignore': - def map_f(values, f): - return lib.map_infer_mask(values, f, - isna(values).view(np.uint8)) - else: - map_f = lib.map_infer - - if isinstance(arg, dict): - if hasattr(arg, '__missing__'): - # If a dictionary subclass defines a default value method, - # convert arg to a lookup function (GH #15999). - dict_with_default = arg - arg = lambda x: dict_with_default[x] - else: - # Dictionary does not have a default. Thus it's safe to - # convert to an indexed series for efficiency. - arg = self._constructor(arg, index=arg.keys()) - - if isinstance(arg, Series): - # arg is a Series - indexer = arg.index.get_indexer(values) - new_values = algorithms.take_1d(arg._values, indexer) - else: - # arg is a function - new_values = map_f(values, arg) - + new_values = super(Series, self)._map_values( + arg, na_action=na_action) return self._constructor(new_values, index=self.index).__finalize__(self) @@ -3248,21 +3201,6 @@ def _try_cast(arr, take_fast_path): else: subarr = _try_cast(data, False) - def create_from_value(value, index, dtype): - # return a new empty value suitable for the dtype - - if is_datetimetz(dtype): - subarr = DatetimeIndex([value] * len(index), dtype=dtype) - elif is_categorical_dtype(dtype): - subarr = Categorical([value] * len(index)) - else: - if not isinstance(dtype, (np.dtype, type(np.dtype))): - dtype = dtype.dtype - subarr = np.empty(len(index), dtype=dtype) - subarr.fill(value) - - return subarr - # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover @@ -3277,7 +3215,8 @@ def create_from_value(value, index, dtype): # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) - subarr = create_from_value(value, index, dtype) + subarr = construct_1d_arraylike_from_scalar( + value, len(index), dtype) else: return subarr.item() @@ -3288,8 +3227,8 @@ def create_from_value(value, index, dtype): # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: - subarr = create_from_value(subarr[0], index, - subarr.dtype) + subarr = construct_1d_arraylike_from_scalar( + subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 43b20f420eb48..ee6434431bcfc 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1005,3 +1005,30 @@ def test_searchsorted_monotonic(self, indices): # non-monotonic should raise. with pytest.raises(ValueError): indices._searchsorted_monotonic(value, side='left') + + def test_map(self): + index = self.create_index() + + # From output of UInt64Index mapping can't infer that we + # shouldn't default to Int64 + if isinstance(index, UInt64Index): + expected = Index(index.values.tolist()) + else: + expected = index + + tm.assert_index_equal(index.map(lambda x: x), expected) + + identity_dict = {x: x for x in index} + tm.assert_index_equal(index.map(identity_dict), expected) + + # Use values to work around MultiIndex instantiation of series + identity_series = Series(expected.values, index=index) + tm.assert_index_equal(index.map(identity_series), expected) + + # empty mappable + nan_index = pd.Index([np.nan] * len(index)) + series_map = pd.Series() + tm.assert_index_equal(index.map(series_map), nan_index) + + dict_map = {} + tm.assert_index_equal(index.map(dict_map), nan_index) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 12b509d4aef3f..839fccc1441e5 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -1,5 +1,7 @@ """ generic datetimelike tests """ - +import pytest +import pandas as pd +import numpy as np from .common import Base import pandas.util.testing as tm @@ -38,3 +40,39 @@ def test_view(self, indices): i_view = i.view(self._holder) result = self._holder(i) tm.assert_index_equal(result, i_view) + + def test_map_callable(self): + + expected = self.index + 1 + result = self.index.map(lambda x: x + 1) + tm.assert_index_equal(result, expected) + + # map to NaT + result = self.index.map(lambda x: pd.NaT if x == self.index[0] else x) + expected = pd.Index([pd.NaT] + self.index[1:].tolist()) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): + expected = self.index + 1 + + # don't compare the freqs + if isinstance(expected, pd.DatetimeIndex): + expected.freq = None + + result = self.index.map(mapper(expected, self.index)) + tm.assert_index_equal(result, expected) + + expected = pd.Index([pd.NaT] + self.index[1:].tolist()) + result = self.index.map(mapper(expected, self.index)) + tm.assert_index_equal(result, expected) + + # empty map; these map to np.nan because we cannot know + # to re-infer things + expected = pd.Index([np.nan] * len(self.index)) + result = self.index.map(mapper([], [])) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 52558c27ce707..9d5746e07814e 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -683,11 +683,9 @@ def test_pickle_freq(self): assert new_prng.freqstr == 'M' def test_map(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - result = index.map(lambda x: x + 1) - expected = index + 1 - tm.assert_index_equal(result, expected) + # test_map_dictlike generally tests + index = PeriodIndex([2005, 2007, 2009], freq='A') result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 99a99cc5cc3eb..f5016e6d19a57 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -4,6 +4,8 @@ from datetime import datetime, timedelta +from collections import defaultdict + import pandas.util.testing as tm from pandas.core.dtypes.common import is_unsigned_integer_dtype from pandas.core.indexes.api import Index, MultiIndex @@ -844,6 +846,64 @@ def test_map_tseries_indices_return_index(self): exp = Index(range(24), name='hourly') tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) + def test_map_with_dict_and_series(self): + # GH 12756 + expected = Index(['foo', 'bar', 'baz']) + mapper = Series(expected.values, index=[0, 1, 2]) + result = tm.makeIntIndex(3).map(mapper) + tm.assert_index_equal(result, expected) + + for name in self.indices.keys(): + if name == 'catIndex': + # Tested in test_categorical + continue + elif name == 'repeats': + # Cannot map duplicated index + continue + + cur_index = self.indices[name] + expected = Index(np.arange(len(cur_index), 0, -1)) + mapper = pd.Series(expected, index=cur_index) + result = cur_index.map(mapper) + + tm.assert_index_equal(result, expected) + + # If the mapper is empty the expected index type is Int64Index + # but the output defaults to Float64 so I treat it independently + mapper = {o: n for o, n in + zip(cur_index, expected)} + + result = cur_index.map(mapper) + if not mapper: + expected = Float64Index([]) + tm.assert_index_equal(result, expected) + + def test_map_with_non_function_missing_values(self): + # GH 12756 + expected = Index([2., np.nan, 'foo']) + input = Index([2, 1, 0]) + + mapper = Series(['foo', 2., 'baz'], index=[0, 2, -1]) + tm.assert_index_equal(expected, input.map(mapper)) + + mapper = {0: 'foo', 2: 2.0, -1: 'baz'} + tm.assert_index_equal(expected, input.map(mapper)) + + def test_map_na_exclusion(self): + idx = Index([1.5, np.nan, 3, np.nan, 5]) + + result = idx.map(lambda x: x * 2, na_action='ignore') + exp = idx * 2 + tm.assert_index_equal(result, exp) + + def test_map_defaultdict(self): + idx = Index([1, 2, 3]) + default_dict = defaultdict(lambda: 'blank') + default_dict[1] = 'stuff' + result = idx.map(default_dict) + expected = Index(['stuff', 'blank', 'blank']) + tm.assert_index_equal(result, expected) + def test_append_multiple(self): index = Index(['a', 'b', 'c', 'd', 'e', 'f']) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5e6898f9c8711..92d5a53f6570b 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -269,6 +269,24 @@ def f(x): ordered=False) tm.assert_index_equal(result, exp) + result = ci.map(pd.Series([10, 20, 30], index=['A', 'B', 'C'])) + tm.assert_index_equal(result, exp) + + result = ci.map({'A': 10, 'B': 20, 'C': 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = pd.Index([1, 2, 3, 4]) + b = pd.Series(["even", "odd", "even", "odd"], + dtype="category") + c = pd.Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = pd.Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) def test_where(self, klass): i = self.create_index() diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 7d6f544f6d533..b17d241ff50e0 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -530,6 +530,10 @@ def test_repr_max_seq_item_setting(self): def test_repr_roundtrip(self): super(TestIntervalIndex, self).test_repr_roundtrip() + @pytest.mark.xfail(reason='get_indexer behavior does not currently work') + def test_map(self): + super(TestIntervalIndex, self).test_map() + def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 533b06088f1bf..e25384ebf7d62 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -187,6 +187,7 @@ def test_misc_coverage(self): assert not idx.equals(list(non_td)) def test_map(self): + # test_map_dictlike generally tests rng = timedelta_range('1 day', periods=10) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 2c93d2afd1760..22b3fd9073bab 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -439,3 +439,21 @@ def test_indexing_with_category(self): res = (cat[['A']] == 'foo') tm.assert_frame_equal(res, exp) + + def test_map_with_dict_or_series(self): + orig_values = ['a', 'B', 1, 'a'] + new_values = ['one', 2, 3.0, 'one'] + cur_index = pd.CategoricalIndex(orig_values, name='XXX') + expected = pd.CategoricalIndex(new_values, + name='XXX', categories=[3.0, 2, 'one']) + + mapper = pd.Series(new_values[:-1], index=orig_values[:-1]) + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) + + mapper = {o: n for o, n in + zip(orig_values[:-1], new_values[:-1])} + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index d0693984689a6..fe21ba569ae99 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -424,6 +424,7 @@ def test_map_dict_with_tuple_keys(self): """ df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]}) label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'} + df['labels'] = df['a'].map(label_mappings) df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index) # All labels should be filled now