diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4ae3d9be04aa7..9f59ea2f2a1f0 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -75,7 +75,7 @@ Other API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`). +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`). - :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2696f9f94375d..f4332ac244af4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2910,7 +2910,10 @@ def map(self, mapper, na_action=None): from .multi import MultiIndex new_values = super(Index, self)._map_values( mapper, na_action=na_action) + attributes = self._get_attributes_dict() + + # we can return a MultiIndex if new_values.size and isinstance(new_values[0], tuple): if isinstance(self, MultiIndex): names = self.names @@ -2923,8 +2926,25 @@ def map(self, mapper, na_action=None): attributes['copy'] = False - # we infer the result types based on the - # returned values + # we want to try to return our original dtype + # ints infer to integer, but if we have + # uints, would prefer to return these + if is_unsigned_integer_dtype(self.dtype): + inferred = lib.infer_dtype(new_values) + if inferred == 'integer': + attributes['dtype'] = self.dtype + + elif not new_values.size: + # empty + attributes['dtype'] = self.dtype + elif isna(new_values).all(): + # all nan + inferred = lib.infer_dtype(self) + if inferred in ['datetime', 'datetime64', + 'timedelta', 'timedelta64', + 'period']: + new_values = [libts.NaT] * len(new_values) + return Index(new_values, **attributes) def isin(self, values, level=None): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ba7795d005721..99bdaf02e25ff 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1007,31 +1007,33 @@ def test_searchsorted_monotonic(self, indices): indices._searchsorted_monotonic(value, side='left') def test_map(self): + # callable index = self.create_index() + expected = index + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) - # From output of UInt64Index mapping can't infer that we - # shouldn't default to Int64 - if isinstance(index, UInt64Index): - expected = Index(index.values.tolist()) - else: - expected = index + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): - tm.assert_index_equal(index.map(lambda x: x), expected) + index = self.create_index() + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) - identity_dict = {x: x for x in index} - tm.assert_index_equal(index.map(identity_dict), expected) + expected = index - # Use values to work around MultiIndex instantiation of series - identity_series = Series(expected.values, index=index) - tm.assert_index_equal(index.map(identity_series), expected) + identity = mapper(index.values, index) + result = index.map(identity) + tm.assert_index_equal(result, expected) # empty mappable - nan_index = pd.Index([np.nan] * len(index)) - series_map = pd.Series() - tm.assert_index_equal(index.map(series_map), nan_index) - - dict_map = {} - tm.assert_index_equal(index.map(dict_map), nan_index) + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) def test_putmask_with_wrong_mask(self): # GH18368 diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 839fccc1441e5..a01c60a47c0f9 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -1,7 +1,6 @@ """ generic datetimelike tests """ import pytest import pandas as pd -import numpy as np from .common import Base import pandas.util.testing as tm @@ -73,6 +72,6 @@ def test_map_dictlike(self, mapper): # empty map; these map to np.nan because we cannot know # to re-infer things - expected = pd.Index([np.nan] * len(self.index)) + expected = pd.Index([pd.NaT] * len(self.index)) result = self.index.map(mapper([], [])) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7dfd1511da292..372c11b296d9e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -852,11 +852,15 @@ def test_map_tseries_indices_return_index(self): exp = Index(range(24), name='hourly') tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) - def test_map_with_dict_and_series(self): + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): # GH 12756 expected = Index(['foo', 'bar', 'baz']) - mapper = Series(expected.values, index=[0, 1, 2]) - result = tm.makeIntIndex(3).map(mapper) + result = tm.makeIntIndex(3).map(mapper(expected.values, [0, 1, 2])) tm.assert_index_equal(result, expected) for name in self.indices.keys(): @@ -867,21 +871,16 @@ def test_map_with_dict_and_series(self): # Cannot map duplicated index continue - cur_index = self.indices[name] - expected = Index(np.arange(len(cur_index), 0, -1)) - mapper = pd.Series(expected, index=cur_index) - result = cur_index.map(mapper) - - tm.assert_index_equal(result, expected) + index = self.indices[name] + expected = Index(np.arange(len(index), 0, -1)) - # If the mapper is empty the expected index type is Int64Index - # but the output defaults to Float64 so I treat it independently - mapper = {o: n for o, n in - zip(cur_index, expected)} + # to match proper result coercion for uints + if name == 'uintIndex': + expected = expected.astype('uint64') + elif name == 'empty': + expected = Index([]) - result = cur_index.map(mapper) - if not mapper: - expected = Float64Index([]) + result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) def test_map_with_non_function_missing_values(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 33ba0189d747a..7df189113247b 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -566,10 +566,6 @@ def test_repr_max_seq_item_setting(self): def test_repr_roundtrip(self): super(TestIntervalIndex, self).test_repr_roundtrip() - @pytest.mark.xfail(reason='get_indexer behavior does not currently work') - def test_map(self): - super(TestIntervalIndex, self).test_map() - def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)