Skip to content

Commit c4fdd71

Browse files
committed
API: empty map should not infer
closes #18509
1 parent 262e8ff commit c4fdd71

File tree

8 files changed

+42
-37
lines changed

8 files changed

+42
-37
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Other API Changes
7575
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
7676
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
7777
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
78-
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
78+
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`).
7979
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
8080
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
8181
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)

pandas/core/indexes/base.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -2925,25 +2925,9 @@ def map(self, mapper, na_action=None):
29252925
names=names)
29262926

29272927
attributes['copy'] = False
2928-
2929-
# we want to try to return our original dtype
2930-
# ints infer to integer, but if we have
2931-
# uints, would prefer to return these
2932-
if is_unsigned_integer_dtype(self.dtype):
2933-
inferred = lib.infer_dtype(new_values)
2934-
if inferred == 'integer':
2935-
attributes['dtype'] = self.dtype
2936-
2937-
elif not new_values.size:
2928+
if not new_values.size:
29382929
# empty
29392930
attributes['dtype'] = self.dtype
2940-
elif isna(new_values).all():
2941-
# all nan
2942-
inferred = lib.infer_dtype(self)
2943-
if inferred in ['datetime', 'datetime64',
2944-
'timedelta', 'timedelta64',
2945-
'period']:
2946-
new_values = [libts.NaT] * len(new_values)
29472931

29482932
return Index(new_values, **attributes)
29492933

pandas/tests/indexes/common.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,13 @@ def test_searchsorted_monotonic(self, indices):
10091009
def test_map(self):
10101010
# callable
10111011
index = self.create_index()
1012-
expected = index
1012+
1013+
# we don't infer UInt64
1014+
if isinstance(index, pd.UInt64Index):
1015+
expected = index.astype('int64')
1016+
else:
1017+
expected = index
1018+
10131019
result = index.map(lambda x: x)
10141020
tm.assert_index_equal(result, expected)
10151021

@@ -1024,9 +1030,14 @@ def test_map_dictlike(self, mapper):
10241030
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
10251031
pytest.skip("skipping tests for {}".format(type(index)))
10261032

1027-
expected = index
1028-
10291033
identity = mapper(index.values, index)
1034+
1035+
# we don't infer to UInt64 for a dict
1036+
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
1037+
expected = index.astype('int64')
1038+
else:
1039+
expected = index
1040+
10301041
result = index.map(identity)
10311042
tm.assert_index_equal(result, expected)
10321043

pandas/tests/indexes/datetimelike.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" generic datetimelike tests """
22
import pytest
3+
import numpy as np
34
import pandas as pd
45
from .common import Base
56
import pandas.util.testing as tm
@@ -72,6 +73,6 @@ def test_map_dictlike(self, mapper):
7273

7374
# empty map; these map to np.nan because we cannot know
7475
# to re-infer things
75-
expected = pd.Index([pd.NaT] * len(self.index))
76+
expected = pd.Index([np.nan] * len(self.index))
7677
result = self.index.map(mapper([], []))
7778
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,7 @@ def test_map_dictlike(self, mapper):
875875
expected = Index(np.arange(len(index), 0, -1))
876876

877877
# to match proper result coercion for uints
878-
if name == 'uintIndex':
879-
expected = expected.astype('uint64')
880-
elif name == 'empty':
878+
if name == 'empty':
881879
expected = Index([])
882880

883881
result = index.map(mapper(expected, index))

pandas/tests/series/test_apply.py

+8
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,14 @@ def test_map(self):
377377
exp = Series([np.nan, 'B', 'C', 'D'])
378378
tm.assert_series_equal(a.map(c), exp)
379379

380+
@pytest.mark.parametrize("index", tm.all_index_generator(10))
381+
def test_map_empty(self, index):
382+
s = Series(index)
383+
result = s.map({})
384+
385+
expected = pd.Series(np.nan, index=s.index)
386+
tm.assert_series_equal(result, expected)
387+
380388
def test_map_compat(self):
381389
# related GH 8024
382390
s = Series([True, True, False], index=[1, 2, 3])

pandas/tests/test_resample.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -816,21 +816,23 @@ def test_resample_empty_dataframe(self):
816816

817817
# test size for GH13212 (currently stays as df)
818818

819-
def test_resample_empty_dtypes(self):
819+
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
820+
@pytest.mark.parametrize(
821+
"dtype",
822+
[np.float, np.int, np.object, 'datetime64[ns]'])
823+
def test_resample_empty_dtypes(self, index, dtype):
820824

821825
# Empty series were sometimes causing a segfault (for the functions
822826
# with Cython bounds-checking disabled) or an IndexError. We just run
823827
# them to ensure they no longer do. (GH #10228)
824-
for index in tm.all_timeseries_index_generator(0):
825-
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
826-
for how in downsample_methods + upsample_methods:
827-
empty_series = Series([], index, dtype)
828-
try:
829-
getattr(empty_series.resample('d'), how)()
830-
except DataError:
831-
# Ignore these since some combinations are invalid
832-
# (ex: doing mean with dtype of np.object)
833-
pass
828+
for how in downsample_methods + upsample_methods:
829+
empty_series = Series([], index, dtype)
830+
try:
831+
getattr(empty_series.resample('d'), how)()
832+
except DataError:
833+
# Ignore these since some combinations are invalid
834+
# (ex: doing mean with dtype of np.object)
835+
pass
834836

835837
def test_resample_loffset_arg_type(self):
836838
# GH 13218, 15002

pandas/util/testing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,8 @@ def all_index_generator(k=10):
17021702
"""
17031703
all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex,
17041704
makeUnicodeIndex, makeDateIndex, makePeriodIndex,
1705-
makeTimedeltaIndex, makeBoolIndex,
1705+
makeTimedeltaIndex, makeBoolIndex, makeRangeIndex,
1706+
makeIntervalIndex,
17061707
makeCategoricalIndex]
17071708
for make_index_func in all_make_index_funcs:
17081709
yield make_index_func(k=k)

0 commit comments

Comments
 (0)