Skip to content

Commit e1ba19a

Browse files
authored
API: empty map should not infer (#18517)
closes #18509
1 parent d163de7 commit e1ba19a

File tree

8 files changed

+42
-37
lines changed

8 files changed

+42
-37
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ Other API Changes
107107
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
108108
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
109109
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
110-
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
110+
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`).
111111
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
112112
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
113113
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)

pandas/core/indexes/base.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -2897,25 +2897,9 @@ def map(self, mapper, na_action=None):
28972897
names=names)
28982898

28992899
attributes['copy'] = False
2900-
2901-
# we want to try to return our original dtype
2902-
# ints infer to integer, but if we have
2903-
# uints, would prefer to return these
2904-
if is_unsigned_integer_dtype(self.dtype):
2905-
inferred = lib.infer_dtype(new_values)
2906-
if inferred == 'integer':
2907-
attributes['dtype'] = self.dtype
2908-
2909-
elif not new_values.size:
2900+
if not new_values.size:
29102901
# empty
29112902
attributes['dtype'] = self.dtype
2912-
elif isna(new_values).all():
2913-
# all nan
2914-
inferred = lib.infer_dtype(self)
2915-
if inferred in ['datetime', 'datetime64',
2916-
'timedelta', 'timedelta64',
2917-
'period']:
2918-
new_values = [libts.NaT] * len(new_values)
29192903

29202904
return Index(new_values, **attributes)
29212905

pandas/tests/indexes/common.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,13 @@ def test_searchsorted_monotonic(self, indices):
10091009
def test_map(self):
10101010
# callable
10111011
index = self.create_index()
1012-
expected = index
1012+
1013+
# we don't infer UInt64
1014+
if isinstance(index, pd.UInt64Index):
1015+
expected = index.astype('int64')
1016+
else:
1017+
expected = index
1018+
10131019
result = index.map(lambda x: x)
10141020
tm.assert_index_equal(result, expected)
10151021

@@ -1024,9 +1030,14 @@ def test_map_dictlike(self, mapper):
10241030
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
10251031
pytest.skip("skipping tests for {}".format(type(index)))
10261032

1027-
expected = index
1028-
10291033
identity = mapper(index.values, index)
1034+
1035+
# we don't infer to UInt64 for a dict
1036+
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
1037+
expected = index.astype('int64')
1038+
else:
1039+
expected = index
1040+
10301041
result = index.map(identity)
10311042
tm.assert_index_equal(result, expected)
10321043

pandas/tests/indexes/datetimelike.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" generic datetimelike tests """
22
import pytest
3+
import numpy as np
34
import pandas as pd
45
from .common import Base
56
import pandas.util.testing as tm
@@ -72,6 +73,6 @@ def test_map_dictlike(self, mapper):
7273

7374
# empty map; these map to np.nan because we cannot know
7475
# to re-infer things
75-
expected = pd.Index([pd.NaT] * len(self.index))
76+
expected = pd.Index([np.nan] * len(self.index))
7677
result = self.index.map(mapper([], []))
7778
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,7 @@ def test_map_dictlike(self, mapper):
885885
expected = Index(np.arange(len(index), 0, -1))
886886

887887
# to match proper result coercion for uints
888-
if name == 'uintIndex':
889-
expected = expected.astype('uint64')
890-
elif name == 'empty':
888+
if name == 'empty':
891889
expected = Index([])
892890

893891
result = index.map(mapper(expected, index))

pandas/tests/series/test_apply.py

+8
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,14 @@ def test_map(self):
377377
exp = Series([np.nan, 'B', 'C', 'D'])
378378
tm.assert_series_equal(a.map(c), exp)
379379

380+
@pytest.mark.parametrize("index", tm.all_index_generator(10))
381+
def test_map_empty(self, index):
382+
s = Series(index)
383+
result = s.map({})
384+
385+
expected = pd.Series(np.nan, index=s.index)
386+
tm.assert_series_equal(result, expected)
387+
380388
def test_map_compat(self):
381389
# related GH 8024
382390
s = Series([True, True, False], index=[1, 2, 3])

pandas/tests/test_resample.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -816,21 +816,23 @@ def test_resample_empty_dataframe(self):
816816

817817
# test size for GH13212 (currently stays as df)
818818

819-
def test_resample_empty_dtypes(self):
819+
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
820+
@pytest.mark.parametrize(
821+
"dtype",
822+
[np.float, np.int, np.object, 'datetime64[ns]'])
823+
def test_resample_empty_dtypes(self, index, dtype):
820824

821825
# Empty series were sometimes causing a segfault (for the functions
822826
# with Cython bounds-checking disabled) or an IndexError. We just run
823827
# them to ensure they no longer do. (GH #10228)
824-
for index in tm.all_timeseries_index_generator(0):
825-
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
826-
for how in downsample_methods + upsample_methods:
827-
empty_series = Series([], index, dtype)
828-
try:
829-
getattr(empty_series.resample('d'), how)()
830-
except DataError:
831-
# Ignore these since some combinations are invalid
832-
# (ex: doing mean with dtype of np.object)
833-
pass
828+
for how in downsample_methods + upsample_methods:
829+
empty_series = Series([], index, dtype)
830+
try:
831+
getattr(empty_series.resample('d'), how)()
832+
except DataError:
833+
# Ignore these since some combinations are invalid
834+
# (ex: doing mean with dtype of np.object)
835+
pass
834836

835837
def test_resample_loffset_arg_type(self):
836838
# GH 13218, 15002

pandas/util/testing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1695,7 +1695,8 @@ def all_index_generator(k=10):
16951695
"""
16961696
all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex,
16971697
makeUnicodeIndex, makeDateIndex, makePeriodIndex,
1698-
makeTimedeltaIndex, makeBoolIndex,
1698+
makeTimedeltaIndex, makeBoolIndex, makeRangeIndex,
1699+
makeIntervalIndex,
16991700
makeCategoricalIndex]
17001701
for make_index_func in all_make_index_funcs:
17011702
yield make_index_func(k=k)

0 commit comments

Comments
 (0)