Skip to content

Commit 68b66ab

Browse files
authored
COMPAT: map infers all-nan / empty correctly (pandas-dev#18491)
1 parent 5f7d86c commit 68b66ab

File tree

6 files changed

+59
-43
lines changed

6 files changed

+59
-43
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Other API Changes
7575
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
7676
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
7777
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
78-
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`).
78+
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
7979
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
8080
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
8181
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)

pandas/core/indexes/base.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -2910,7 +2910,10 @@ def map(self, mapper, na_action=None):
29102910
from .multi import MultiIndex
29112911
new_values = super(Index, self)._map_values(
29122912
mapper, na_action=na_action)
2913+
29132914
attributes = self._get_attributes_dict()
2915+
2916+
# we can return a MultiIndex
29142917
if new_values.size and isinstance(new_values[0], tuple):
29152918
if isinstance(self, MultiIndex):
29162919
names = self.names
@@ -2923,8 +2926,25 @@ def map(self, mapper, na_action=None):
29232926

29242927
attributes['copy'] = False
29252928

2926-
# we infer the result types based on the
2927-
# returned values
2929+
# we want to try to return our original dtype
2930+
# ints infer to integer, but if we have
2931+
# uints, would prefer to return these
2932+
if is_unsigned_integer_dtype(self.dtype):
2933+
inferred = lib.infer_dtype(new_values)
2934+
if inferred == 'integer':
2935+
attributes['dtype'] = self.dtype
2936+
2937+
elif not new_values.size:
2938+
# empty
2939+
attributes['dtype'] = self.dtype
2940+
elif isna(new_values).all():
2941+
# all nan
2942+
inferred = lib.infer_dtype(self)
2943+
if inferred in ['datetime', 'datetime64',
2944+
'timedelta', 'timedelta64',
2945+
'period']:
2946+
new_values = [libts.NaT] * len(new_values)
2947+
29282948
return Index(new_values, **attributes)
29292949

29302950
def isin(self, values, level=None):

pandas/tests/indexes/common.py

+20-18
Original file line numberDiff line numberDiff line change
@@ -1007,31 +1007,33 @@ def test_searchsorted_monotonic(self, indices):
10071007
indices._searchsorted_monotonic(value, side='left')
10081008

10091009
def test_map(self):
1010+
# callable
10101011
index = self.create_index()
1012+
expected = index
1013+
result = index.map(lambda x: x)
1014+
tm.assert_index_equal(result, expected)
10111015

1012-
# From output of UInt64Index mapping can't infer that we
1013-
# shouldn't default to Int64
1014-
if isinstance(index, UInt64Index):
1015-
expected = Index(index.values.tolist())
1016-
else:
1017-
expected = index
1016+
@pytest.mark.parametrize(
1017+
"mapper",
1018+
[
1019+
lambda values, index: {i: e for e, i in zip(values, index)},
1020+
lambda values, index: pd.Series(values, index)])
1021+
def test_map_dictlike(self, mapper):
10181022

1019-
tm.assert_index_equal(index.map(lambda x: x), expected)
1023+
index = self.create_index()
1024+
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
1025+
pytest.skip("skipping tests for {}".format(type(index)))
10201026

1021-
identity_dict = {x: x for x in index}
1022-
tm.assert_index_equal(index.map(identity_dict), expected)
1027+
expected = index
10231028

1024-
# Use values to work around MultiIndex instantiation of series
1025-
identity_series = Series(expected.values, index=index)
1026-
tm.assert_index_equal(index.map(identity_series), expected)
1029+
identity = mapper(index.values, index)
1030+
result = index.map(identity)
1031+
tm.assert_index_equal(result, expected)
10271032

10281033
# empty mappable
1029-
nan_index = pd.Index([np.nan] * len(index))
1030-
series_map = pd.Series()
1031-
tm.assert_index_equal(index.map(series_map), nan_index)
1032-
1033-
dict_map = {}
1034-
tm.assert_index_equal(index.map(dict_map), nan_index)
1034+
expected = pd.Index([np.nan] * len(index))
1035+
result = index.map(mapper(expected, index))
1036+
tm.assert_index_equal(result, expected)
10351037

10361038
def test_putmask_with_wrong_mask(self):
10371039
# GH18368

pandas/tests/indexes/datetimelike.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
""" generic datetimelike tests """
22
import pytest
33
import pandas as pd
4-
import numpy as np
54
from .common import Base
65
import pandas.util.testing as tm
76

@@ -73,6 +72,6 @@ def test_map_dictlike(self, mapper):
7372

7473
# empty map; these map to np.nan because we cannot know
7574
# to re-infer things
76-
expected = pd.Index([np.nan] * len(self.index))
75+
expected = pd.Index([pd.NaT] * len(self.index))
7776
result = self.index.map(mapper([], []))
7877
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -852,11 +852,15 @@ def test_map_tseries_indices_return_index(self):
852852
exp = Index(range(24), name='hourly')
853853
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))
854854

855-
def test_map_with_dict_and_series(self):
855+
@pytest.mark.parametrize(
856+
"mapper",
857+
[
858+
lambda values, index: {i: e for e, i in zip(values, index)},
859+
lambda values, index: pd.Series(values, index)])
860+
def test_map_dictlike(self, mapper):
856861
# GH 12756
857862
expected = Index(['foo', 'bar', 'baz'])
858-
mapper = Series(expected.values, index=[0, 1, 2])
859-
result = tm.makeIntIndex(3).map(mapper)
863+
result = tm.makeIntIndex(3).map(mapper(expected.values, [0, 1, 2]))
860864
tm.assert_index_equal(result, expected)
861865

862866
for name in self.indices.keys():
@@ -867,21 +871,16 @@ def test_map_with_dict_and_series(self):
867871
# Cannot map duplicated index
868872
continue
869873

870-
cur_index = self.indices[name]
871-
expected = Index(np.arange(len(cur_index), 0, -1))
872-
mapper = pd.Series(expected, index=cur_index)
873-
result = cur_index.map(mapper)
874-
875-
tm.assert_index_equal(result, expected)
874+
index = self.indices[name]
875+
expected = Index(np.arange(len(index), 0, -1))
876876

877-
# If the mapper is empty the expected index type is Int64Index
878-
# but the output defaults to Float64 so I treat it independently
879-
mapper = {o: n for o, n in
880-
zip(cur_index, expected)}
877+
# to match proper result coercion for uints
878+
if name == 'uintIndex':
879+
expected = expected.astype('uint64')
880+
elif name == 'empty':
881+
expected = Index([])
881882

882-
result = cur_index.map(mapper)
883-
if not mapper:
884-
expected = Float64Index([])
883+
result = index.map(mapper(expected, index))
885884
tm.assert_index_equal(result, expected)
886885

887886
def test_map_with_non_function_missing_values(self):

pandas/tests/indexes/test_interval.py

-4
Original file line numberDiff line numberDiff line change
@@ -584,10 +584,6 @@ def test_repr_max_seq_item_setting(self):
584584
def test_repr_roundtrip(self):
585585
super(TestIntervalIndex, self).test_repr_roundtrip()
586586

587-
@pytest.mark.xfail(reason='get_indexer behavior does not currently work')
588-
def test_map(self):
589-
super(TestIntervalIndex, self).test_map()
590-
591587
def test_get_item(self, closed):
592588
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
593589
closed=closed)

0 commit comments

Comments
 (0)