Skip to content

COMPAT: map infers all-nan / empty correctly #18491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Other API Changes
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`).
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
Expand Down
24 changes: 22 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2910,7 +2910,10 @@ def map(self, mapper, na_action=None):
from .multi import MultiIndex
new_values = super(Index, self)._map_values(
mapper, na_action=na_action)

attributes = self._get_attributes_dict()

# we can return a MultiIndex
if new_values.size and isinstance(new_values[0], tuple):
if isinstance(self, MultiIndex):
names = self.names
Expand All @@ -2923,8 +2926,25 @@ def map(self, mapper, na_action=None):

attributes['copy'] = False

# we infer the result types based on the
# returned values
# we want to try to return our original dtype
# ints infer to integer, but if we have
# uints, would prefer to return these
if is_unsigned_integer_dtype(self.dtype):
inferred = lib.infer_dtype(new_values)
if inferred == 'integer':
attributes['dtype'] = self.dtype

elif not new_values.size:
# empty
attributes['dtype'] = self.dtype
elif isna(new_values).all():
# all nan
inferred = lib.infer_dtype(self)
if inferred in ['datetime', 'datetime64',
'timedelta', 'timedelta64',
'period']:
new_values = [libts.NaT] * len(new_values)

return Index(new_values, **attributes)

def isin(self, values, level=None):
Expand Down
38 changes: 20 additions & 18 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,31 +1007,33 @@ def test_searchsorted_monotonic(self, indices):
indices._searchsorted_monotonic(value, side='left')

def test_map(self):
# callable
index = self.create_index()
expected = index
result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)

# From output of UInt64Index mapping can't infer that we
# shouldn't default to Int64
if isinstance(index, UInt64Index):
expected = Index(index.values.tolist())
else:
expected = index
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index)])
def test_map_dictlike(self, mapper):

tm.assert_index_equal(index.map(lambda x: x), expected)
index = self.create_index()
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(index)))

identity_dict = {x: x for x in index}
tm.assert_index_equal(index.map(identity_dict), expected)
expected = index

# Use values to work around MultiIndex instantiation of series
identity_series = Series(expected.values, index=index)
tm.assert_index_equal(index.map(identity_series), expected)
identity = mapper(index.values, index)
result = index.map(identity)
tm.assert_index_equal(result, expected)

# empty mappable
nan_index = pd.Index([np.nan] * len(index))
series_map = pd.Series()
tm.assert_index_equal(index.map(series_map), nan_index)

dict_map = {}
tm.assert_index_equal(index.map(dict_map), nan_index)
expected = pd.Index([np.nan] * len(index))
result = index.map(mapper(expected, index))
tm.assert_index_equal(result, expected)

def test_putmask_with_wrong_mask(self):
# GH18368
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
""" generic datetimelike tests """
import pytest
import pandas as pd
import numpy as np
from .common import Base
import pandas.util.testing as tm

Expand Down Expand Up @@ -73,6 +72,6 @@ def test_map_dictlike(self, mapper):

# empty map; these map to np.nan because we cannot know
# to re-infer things
expected = pd.Index([np.nan] * len(self.index))
expected = pd.Index([pd.NaT] * len(self.index))
result = self.index.map(mapper([], []))
tm.assert_index_equal(result, expected)
31 changes: 15 additions & 16 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,11 +852,15 @@ def test_map_tseries_indices_return_index(self):
exp = Index(range(24), name='hourly')
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))

def test_map_with_dict_and_series(self):
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index)])
def test_map_dictlike(self, mapper):
# GH 12756
expected = Index(['foo', 'bar', 'baz'])
mapper = Series(expected.values, index=[0, 1, 2])
result = tm.makeIntIndex(3).map(mapper)
result = tm.makeIntIndex(3).map(mapper(expected.values, [0, 1, 2]))
tm.assert_index_equal(result, expected)

for name in self.indices.keys():
Expand All @@ -867,21 +871,16 @@ def test_map_with_dict_and_series(self):
# Cannot map duplicated index
continue

cur_index = self.indices[name]
expected = Index(np.arange(len(cur_index), 0, -1))
mapper = pd.Series(expected, index=cur_index)
result = cur_index.map(mapper)

tm.assert_index_equal(result, expected)
index = self.indices[name]
expected = Index(np.arange(len(index), 0, -1))

# If the mapper is empty the expected index type is Int64Index
# but the output defaults to Float64 so I treat it independently
mapper = {o: n for o, n in
zip(cur_index, expected)}
# to match proper result coercion for uints
if name == 'uintIndex':
expected = expected.astype('uint64')
elif name == 'empty':
expected = Index([])

result = cur_index.map(mapper)
if not mapper:
expected = Float64Index([])
result = index.map(mapper(expected, index))
tm.assert_index_equal(result, expected)

def test_map_with_non_function_missing_values(self):
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,6 @@ def test_repr_max_seq_item_setting(self):
def test_repr_roundtrip(self):
super(TestIntervalIndex, self).test_repr_roundtrip()

@pytest.mark.xfail(reason='get_indexer behavior does not currently work')
def test_map(self):
super(TestIntervalIndex, self).test_map()

def test_get_item(self, closed):
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
closed=closed)
Expand Down