Skip to content

Commit 527f879

Browse files
committed
Refactor the code to work with period time time delta indices
1 parent 5b1ca5b commit 527f879

File tree

10 files changed

+146
-40
lines changed

10 files changed

+146
-40
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ Other enhancements
139139
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
140140
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
141141
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).
142+
- ``Index.map`` can now accept series and dictionary input object.
143+
142144

143145
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
144146

pandas/core/series.py

+2-16
Original file line numberDiff line numberDiff line change
@@ -184,23 +184,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
184184
index = Index(data)
185185
else:
186186
index = Index(_try_sort(data))
187+
187188
try:
188-
if isinstance(index, DatetimeIndex):
189-
if len(data):
190-
# coerce back to datetime objects for lookup
191-
data = _dict_compat(data)
192-
data = lib.fast_multiget(data,
193-
index.asobject.values,
194-
default=np.nan)
195-
else:
196-
data = np.nan
197-
# GH #12169
198-
elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
199-
data = ([data.get(i, nan) for i in index]
200-
if data else np.nan)
201-
else:
202-
data = lib.fast_multiget(data, index.values,
203-
default=np.nan)
189+
data = index.get_values_from_dict(data)
204190
except TypeError:
205191
data = ([data.get(i, nan) for i in index]
206192
if data else np.nan)

pandas/indexes/base.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -2448,6 +2448,25 @@ def get_indexer_for(self, target, **kwargs):
24482448
indexer, _ = self.get_indexer_non_unique(target, **kwargs)
24492449
return indexer
24502450

2451+
def get_values_from_dict(self, input_dict):
2452+
"""Return the values of the input dictionary in the order the keys are
2453+
in the index. np.nan is returned for index values not in the
2454+
dictionary.
2455+
2456+
Parameters
2457+
----------
2458+
input_dict : dict
2459+
The dictionary from which to extract the values
2460+
2461+
Returns
2462+
-------
2463+
Union[np.array, list]
2464+
2465+
"""
2466+
2467+
return lib.fast_multiget(input_dict, self.values,
2468+
default=np.nan)
2469+
24512470
def _possibly_promote(self, other):
24522471
# A hack, but it works
24532472
from pandas.tseries.index import DatetimeIndex
@@ -2491,8 +2510,8 @@ def map(self, mapper):
24912510
24922511
Parameters
24932512
----------
2494-
mapper : function, dict, or Series
2495-
Function to be applied.
2513+
mapper : Union[function, dict, Series]
2514+
Function to be applied or input correspondence object.
24962515
24972516
Returns
24982517
-------
@@ -2505,12 +2524,15 @@ def map(self, mapper):
25052524
from .multi import MultiIndex
25062525

25072526
if isinstance(mapper, ABCSeries):
2508-
indexer = mapper.index.get_indexer(self._values)
2527+
indexer = mapper.index.get_indexer(self.values)
25092528
mapped_values = algos.take_1d(mapper.values, indexer)
2529+
elif isinstance(mapper, dict):
2530+
idx = Index(mapper.keys())
2531+
data = idx.get_values_from_dict(mapper)
2532+
indexer = idx.get_indexer(self.values)
2533+
mapped_values = algos.take_1d(data, indexer)
25102534
else:
2511-
if isinstance(mapper, dict):
2512-
mapper = mapper.get
2513-
mapped_values = self._arrmap(self._values, mapper)
2535+
mapped_values = self._arrmap(self.values, mapper)
25142536

25152537
attributes = self._get_attributes_dict()
25162538
if mapped_values.size and isinstance(mapped_values[0], tuple):

pandas/tests/indexes/test_base.py

+31-14
Original file line numberDiff line numberDiff line change
@@ -811,10 +811,10 @@ def test_map_tseries_indices_return_index(self):
811811
exp = Index(range(24), name='hourly')
812812
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))
813813

814-
def test_map_with_series_all_indices(self):
814+
def test_map_with_dict_and_series(self):
815815
expected = Index(['foo', 'bar', 'baz'])
816816
mapper = Series(expected.values, index=[0, 1, 2])
817-
self.assert_index_equal(tm.makeIntIndex(3).map(mapper), expected)
817+
tm.assert_index_equal(tm.makeIntIndex(3).map(mapper), expected)
818818

819819
# GH 12766
820820
# special = []
@@ -824,41 +824,58 @@ def test_map_with_series_all_indices(self):
824824
orig_values = ['a', 'B', 1, 'a']
825825
new_values = ['one', 2, 3.0, 'one']
826826
cur_index = CategoricalIndex(orig_values, name='XXX')
827+
expected = CategoricalIndex(new_values,
828+
name='XXX', categories=[3.0, 2, 'one'])
829+
827830
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
828-
expected = CategoricalIndex(new_values, name='XXX')
829831
output = cur_index.map(mapper)
830-
self.assert_numpy_array_equal(expected.values.get_values(), output.values.get_values())
831-
self.assert_equal(expected.name, output.name)
832+
# Order of categories in output can be different
833+
tm.assert_index_equal(expected, output)
834+
835+
mapper = {o: n for o, n in
836+
zip(orig_values[:-1], new_values[:-1])}
837+
output = cur_index.map(mapper)
838+
# Order of categories in output can be different
839+
tm.assert_index_equal(expected, output)
832840

833841
for name in list(set(self.indices.keys()) - set(special)):
834842
cur_index = self.indices[name]
835843
expected = Index(np.arange(len(cur_index), 0, -1))
836-
mapper = pd.Series(expected.values, index=cur_index)
837-
print(name)
838-
output = cur_index.map(mapper)
839-
self.assert_index_equal(expected, cur_index.map(mapper))
844+
mapper = pd.Series(expected, index=cur_index)
845+
tm.assert_index_equal(expected, cur_index.map(mapper))
846+
847+
mapper = {o: n for o, n in
848+
zip(cur_index, expected)}
849+
if mapper:
850+
tm.assert_index_equal(expected, cur_index.map(mapper))
851+
else:
852+
# The expected index type is Int64Index
853+
# but the output defaults to Float64
854+
tm.assert_index_equal(Float64Index([]),
855+
cur_index.map(mapper))
840856

841857
def test_map_with_categorical_series(self):
842858
# GH 12756
843859
a = Index([1, 2, 3, 4])
844-
b = Series(["even", "odd", "even", "odd"], dtype="category")
860+
b = Series(["even", "odd", "even", "odd"],
861+
dtype="category")
845862
c = Series(["even", "odd", "even", "odd"])
846863

847864
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
848-
self.assert_index_equal(a.map(b), exp)
865+
tm.assert_index_equal(a.map(b), exp)
849866
exp = Index(["odd", "even", "odd", np.nan])
850-
self.assert_index_equal(a.map(c), exp)
867+
tm.assert_index_equal(a.map(c), exp)
851868

852869
def test_map_with_non_function_missing_values(self):
853870
# GH 12756
854871
expected = Index([2., np.nan, 'foo'])
855872
input = Index([2, 1, 0])
856873

857874
mapper = Series(['foo', 2., 'baz'], index=[0, 2, -1])
858-
self.assert_index_equal(expected, input.map(mapper))
875+
tm.assert_index_equal(expected, input.map(mapper))
859876

860877
mapper = {0: 'foo', 2: 2.0, -1: 'baz'}
861-
self.assert_index_equal(expected, input.map(mapper))
878+
tm.assert_index_equal(expected, input.map(mapper))
862879

863880
def test_append_multiple(self):
864881
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])

pandas/tseries/index.py

+24
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,30 @@ def get_value_maybe_box(self, series, key):
13801380
key, tz=self.tz)
13811381
return _maybe_box(self, values, series, key)
13821382

1383+
def get_values_from_dict(self, input_dict):
1384+
"""Return the values of the input dictionary in the order the keys are
1385+
in the index. np.nan is returned for index values not in the
1386+
dictionary.
1387+
1388+
Parameters
1389+
----------
1390+
input_dict : dict
1391+
The dictionary from which to extract the values
1392+
1393+
Returns
1394+
-------
1395+
Union[np.array, list]
1396+
1397+
"""
1398+
if len(input_dict):
1399+
# coerce back to datetime objects for lookup
1400+
input_dict = com._dict_compat(input_dict)
1401+
return lib.fast_multiget(input_dict,
1402+
self.asobject.values,
1403+
default=np.nan)
1404+
else:
1405+
return np.nan
1406+
13831407
def get_loc(self, key, method=None, tolerance=None):
13841408
"""
13851409
Get integer location for requested label

pandas/tseries/period.py

+19
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,25 @@ def _get_unique_index(self, dropna=False):
783783
res = res.dropna()
784784
return res
785785

786+
def get_values_from_dict(self, input_dict):
787+
"""Return the values of the input dictionary in the order the keys are
788+
in the index. np.nan is returned for index values not in the
789+
dictionary.
790+
791+
Parameters
792+
----------
793+
input_dict : dict
794+
The dictionary from which to extract the values
795+
796+
Returns
797+
-------
798+
Union[np.array, list]
799+
800+
"""
801+
802+
return np.array([input_dict.get(i, np.nan) for i in self.values]
803+
if input_dict else [np.nan])
804+
786805
def get_loc(self, key, method=None, tolerance=None):
787806
"""
788807
Get integer location for requested label

pandas/tseries/tdi.py

+20
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,26 @@ def get_value_maybe_box(self, series, key):
665665
values = self._engine.get_value(_values_from_object(series), key)
666666
return _maybe_box(self, values, series, key)
667667

668+
def get_values_from_dict(self, input_dict):
669+
"""Return the values of the input dictionary in the order the keys are
670+
in the index. np.nan is returned for index values not in the
671+
dictionary.
672+
673+
Parameters
674+
----------
675+
input_dict : dict
676+
The dictionary from which to extract the values
677+
678+
Returns
679+
-------
680+
Union[np.array, list]
681+
682+
"""
683+
684+
return np.array([input_dict.get(i, np.nan)
685+
for i in self.asobject.values]
686+
if input_dict else [np.nan])
687+
668688
def get_loc(self, key, method=None, tolerance=None):
669689
"""
670690
Get integer location for requested label

pandas/tseries/tests/test_period.py

+4
Original file line numberDiff line numberDiff line change
@@ -3511,6 +3511,10 @@ def test_map(self):
35113511
result = index.map(lambda x: x + 1)
35123512
expected = index + 1
35133513
tm.assert_index_equal(result, expected)
3514+
series_map = pd.Series(expected, index)
3515+
tm.assert_index_equal(index.map(series_map), expected)
3516+
dict_map = {i: e for e, i in zip(expected, index)}
3517+
tm.assert_index_equal(index.map(dict_map), expected)
35143518

35153519
result = index.map(lambda x: x.ordinal)
35163520
exp = Index([x.ordinal for x in index])

pandas/tseries/tests/test_timedeltas.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1574,14 +1574,19 @@ def test_ops_error_str(self):
15741574
l != r
15751575

15761576
def test_map(self):
1577-
15781577
rng = timedelta_range('1 day', periods=10)
15791578

15801579
f = lambda x: x.days
15811580
result = rng.map(f)
15821581
exp = Int64Index([f(x) for x in rng])
15831582
tm.assert_index_equal(result, exp)
15841583

1584+
s = Series(exp, index=rng)
1585+
tm.assert_index_equal(rng.map(s), exp)
1586+
1587+
d = {r: v for r, v in zip(rng, exp)}
1588+
tm.assert_index_equal(rng.map(d), exp)
1589+
15851590
def test_misc_coverage(self):
15861591

15871592
rng = timedelta_range('1 day', periods=5)

pandas/tseries/tests/test_timeseries.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -3012,9 +3012,16 @@ def test_map(self):
30123012
rng = date_range('1/1/2000', periods=10)
30133013

30143014
f = lambda x: x.strftime('%Y%m%d')
3015-
result = rng.map(f)
3016-
exp = Index([f(x) for x in rng], dtype='<U8')
3017-
tm.assert_index_equal(result, exp)
3015+
exp_values = [f(x) for x in rng]
3016+
exp = Index(exp_values, dtype='<U8')
3017+
3018+
tm.assert_index_equal(rng.map(f), exp)
3019+
3020+
mapper = {o: n for o, n in zip(rng, exp_values)}
3021+
tm.assert_index_equal(rng.map(mapper), exp)
3022+
3023+
mapper = Series(exp_values, index=rng)
3024+
tm.assert_index_equal(rng.map(mapper), exp)
30183025

30193026
def test_iteration_preserves_tz(self):
30203027

0 commit comments

Comments
 (0)