From 4dfe39ab9a9c5177661ee51854ebb5453b94a5b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Nov 2017 17:26:06 -0500 Subject: [PATCH 1/5] TST: test_map parametrize xref #18482 --- pandas/tests/indexes/common.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ba7795d005721..f194158ce1323 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1018,20 +1018,32 @@ def test_map(self): tm.assert_index_equal(index.map(lambda x: x), expected) - identity_dict = {x: x for x in index} - tm.assert_index_equal(index.map(identity_dict), expected) + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): - # Use values to work around MultiIndex instantiation of series - identity_series = Series(expected.values, index=index) - tm.assert_index_equal(index.map(identity_series), expected) + index = self.create_index() + if isinstance(index, pd.CategoricalIndex): + pytest.skip("tested in test_categorical") - # empty mappable - nan_index = pd.Index([np.nan] * len(index)) - series_map = pd.Series() - tm.assert_index_equal(index.map(series_map), nan_index) + # From output of UInt64Index mapping can't infer that we + # shouldn't default to Int64 + if isinstance(index, UInt64Index): + expected = Index(index.values.tolist()) + else: + expected = index + + identity = mapper({x: x for x in index}, index) + result = index.map(identity) + tm.assert_index_equal(result, expected) - dict_map = {} - tm.assert_index_equal(index.map(dict_map), nan_index) + # empty mappable + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) def test_putmask_with_wrong_mask(self): # GH18368 From 6260bda3d7c5a4d5b3fba77351b6f544aebad234 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Nov 2017 17:49:38 -0500 Subject: [PATCH 2/5] fix uints --- pandas/core/indexes/base.py | 11 +++++++++-- pandas/tests/indexes/common.py | 21 +++++---------------- pandas/tests/indexes/test_base.py | 31 +++++++++++++++---------------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2696f9f94375d..3e886be94c5f5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2910,6 +2910,7 @@ def map(self, mapper, na_action=None): from .multi import MultiIndex new_values = super(Index, self)._map_values( mapper, na_action=na_action) + attributes = self._get_attributes_dict() if new_values.size and isinstance(new_values[0], tuple): if isinstance(self, MultiIndex): @@ -2923,8 +2924,14 @@ def map(self, mapper, na_action=None): attributes['copy'] = False - # we infer the result types based on the - # returned values + # we want to try to return our original dtype + # ints infer to integer, but if we have + # uints, would prefer to return these + if is_unsigned_integer_dtype(self.dtype): + inferred = lib.infer_dtype(new_values) + if inferred == 'integer': + attributes['dtype'] = self.dtype + return Index(new_values, **attributes) def isin(self, values, level=None): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index f194158ce1323..eea96c383d85f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1007,16 +1007,11 @@ def test_searchsorted_monotonic(self, indices): indices._searchsorted_monotonic(value, side='left') def test_map(self): + # callable index = self.create_index() - - # From output of UInt64Index mapping can't infer that we - # shouldn't default to Int64 - if isinstance(index, UInt64Index): - expected = Index(index.values.tolist()) - else: - expected = index - - tm.assert_index_equal(index.map(lambda x: x), expected) + expected = index + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "mapper", @@ -1028,13 +1023,7 @@ def test_map_dictlike(self, mapper): index = self.create_index() if isinstance(index, pd.CategoricalIndex): pytest.skip("tested in test_categorical") - - # From output of UInt64Index mapping can't infer that we - # shouldn't default to Int64 - if isinstance(index, UInt64Index): - expected = Index(index.values.tolist()) - else: - expected = index + expected = index identity = mapper({x: x for x in index}, index) result = index.map(identity) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7dfd1511da292..c3cfa7ba807b8 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -852,11 +852,15 @@ def test_map_tseries_indices_return_index(self): exp = Index(range(24), name='hourly') tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) - def test_map_with_dict_and_series(self): + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): # GH 12756 expected = Index(['foo', 'bar', 'baz']) - mapper = Series(expected.values, index=[0, 1, 2]) - result = tm.makeIntIndex(3).map(mapper) + result = tm.makeIntIndex(3).map(mapper(expected.values, [0, 1, 2])) tm.assert_index_equal(result, expected) for name in self.indices.keys(): @@ -867,21 +871,16 @@ def test_map_with_dict_and_series(self): # Cannot map duplicated index continue - cur_index = self.indices[name] - expected = Index(np.arange(len(cur_index), 0, -1)) - mapper = pd.Series(expected, index=cur_index) - result = cur_index.map(mapper) - - tm.assert_index_equal(result, expected) - - # If the mapper is empty the expected index type is Int64Index - # but the output defaults to Float64 so I treat it independently - mapper = {o: n for o, n in - zip(cur_index, expected)} + index = self.indices[name] + expected = Index(np.arange(len(index), 0, -1)) - result = cur_index.map(mapper) - if not mapper: + # to match proper result coercion for uints + if name == 'uintIndex': + expected = expected.astype('uint64') + elif name == 'empty': expected = Float64Index([]) + + result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) def test_map_with_non_function_missing_values(self): From 689029968db33c2ecca97d7be6bd6c5169189481 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Nov 2017 18:01:29 -0500 Subject: [PATCH 3/5] all-nan inferred to correct dtype --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/indexes/base.py | 13 +++++++++++++ pandas/tests/indexes/datetimelike.py | 3 +-- pandas/tests/indexes/test_base.py | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4ae3d9be04aa7..9f59ea2f2a1f0 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -75,7 +75,7 @@ Other API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`). +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`). - :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3e886be94c5f5..f4332ac244af4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2912,6 +2912,8 @@ def map(self, mapper, na_action=None): mapper, na_action=na_action) attributes = self._get_attributes_dict() + + # we can return a MultiIndex if new_values.size and isinstance(new_values[0], tuple): if isinstance(self, MultiIndex): names = self.names @@ -2932,6 +2934,17 @@ def map(self, mapper, na_action=None): if inferred == 'integer': attributes['dtype'] = self.dtype + elif not new_values.size: + # empty + attributes['dtype'] = self.dtype + elif isna(new_values).all(): + # all nan + inferred = lib.infer_dtype(self) + if inferred in ['datetime', 'datetime64', + 'timedelta', 'timedelta64', + 'period']: + new_values = [libts.NaT] * len(new_values) + return Index(new_values, **attributes) def isin(self, values, level=None): diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 839fccc1441e5..a01c60a47c0f9 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -1,7 +1,6 @@ """ generic datetimelike tests """ import pytest import pandas as pd -import numpy as np from .common import Base import pandas.util.testing as tm @@ -73,6 +72,6 @@ def test_map_dictlike(self, mapper): # empty map; these map to np.nan because we cannot know # to re-infer things - expected = pd.Index([np.nan] * len(self.index)) + expected = pd.Index([pd.NaT] * len(self.index)) result = self.index.map(mapper([], [])) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c3cfa7ba807b8..372c11b296d9e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -878,7 +878,7 @@ def test_map_dictlike(self, mapper): if name == 'uintIndex': expected = expected.astype('uint64') elif name == 'empty': - expected = Float64Index([]) + expected = Index([]) result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) From 7e19fd2371fd49f80bea0749f7d0f6d7aa566142 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Nov 2017 19:04:49 -0500 Subject: [PATCH 4/5] don't use a double dict --- pandas/tests/indexes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index eea96c383d85f..d63cca6abc983 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1025,7 +1025,7 @@ def test_map_dictlike(self, mapper): pytest.skip("tested in test_categorical") expected = index - identity = mapper({x: x for x in index}, index) + identity = mapper(index.values, index) result = index.map(identity) tm.assert_index_equal(result, expected) From 9447d2ab7ff8d00d8a71a91051c243795279f4fc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Nov 2017 21:19:56 -0500 Subject: [PATCH 5/5] skip interval tests --- pandas/tests/indexes/common.py | 5 +++-- pandas/tests/indexes/test_interval.py | 4 ---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index d63cca6abc983..99bdaf02e25ff 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1021,8 +1021,9 @@ def test_map(self): def test_map_dictlike(self, mapper): index = self.create_index() - if isinstance(index, pd.CategoricalIndex): - pytest.skip("tested in test_categorical") + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) + expected = index identity = mapper(index.values, index) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 33ba0189d747a..7df189113247b 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -566,10 +566,6 @@ def test_repr_max_seq_item_setting(self): def test_repr_roundtrip(self): super(TestIntervalIndex, self).test_repr_roundtrip() - @pytest.mark.xfail(reason='get_indexer behavior does not currently work') - def test_map(self): - super(TestIntervalIndex, self).test_map() - def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)