Skip to content

Commit b02834b

Browse files
committed
Refactor the code to work with period time time delta indices
1 parent fd72c4f commit b02834b

File tree

10 files changed

+145
-40
lines changed

10 files changed

+145
-40
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ Other enhancements
113113
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
114114
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
115115
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
116+
- ``Index.map`` can now accept series and dictionary input object
116117

117118
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
118119

pandas/core/series.py

+2-16
Original file line numberDiff line numberDiff line change
@@ -184,23 +184,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
184184
index = Index(data)
185185
else:
186186
index = Index(_try_sort(data))
187+
187188
try:
188-
if isinstance(index, DatetimeIndex):
189-
if len(data):
190-
# coerce back to datetime objects for lookup
191-
data = _dict_compat(data)
192-
data = lib.fast_multiget(data,
193-
index.asobject.values,
194-
default=np.nan)
195-
else:
196-
data = np.nan
197-
# GH #12169
198-
elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
199-
data = ([data.get(i, nan) for i in index]
200-
if data else np.nan)
201-
else:
202-
data = lib.fast_multiget(data, index.values,
203-
default=np.nan)
189+
data = index.get_values_from_dict(data)
204190
except TypeError:
205191
data = ([data.get(i, nan) for i in index]
206192
if data else np.nan)

pandas/indexes/base.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -2394,6 +2394,25 @@ def get_indexer_for(self, target, **kwargs):
23942394
indexer, _ = self.get_indexer_non_unique(target, **kwargs)
23952395
return indexer
23962396

2397+
def get_values_from_dict(self, input_dict):
2398+
"""Return the values of the input dictionary in the order the keys are
2399+
in the index. np.nan is returned for index values not in the
2400+
dictionary.
2401+
2402+
Parameters
2403+
----------
2404+
input_dict : dict
2405+
The dictionary from which to extract the values
2406+
2407+
Returns
2408+
-------
2409+
Union[np.array, list]
2410+
2411+
"""
2412+
2413+
return lib.fast_multiget(input_dict, self.values,
2414+
default=np.nan)
2415+
23972416
def _possibly_promote(self, other):
23982417
# A hack, but it works
23992418
from pandas.tseries.index import DatetimeIndex
@@ -2437,8 +2456,8 @@ def map(self, mapper):
24372456
24382457
Parameters
24392458
----------
2440-
mapper : function, dict, or Series
2441-
Function to be applied.
2459+
mapper : Union[function, dict, Series]
2460+
Function to be applied or input correspondence object.
24422461
24432462
Returns
24442463
-------
@@ -2451,12 +2470,15 @@ def map(self, mapper):
24512470
from .multi import MultiIndex
24522471

24532472
if isinstance(mapper, ABCSeries):
2454-
indexer = mapper.index.get_indexer(self._values)
2473+
indexer = mapper.index.get_indexer(self.values)
24552474
mapped_values = algos.take_1d(mapper.values, indexer)
2475+
elif isinstance(mapper, dict):
2476+
idx = Index(mapper.keys())
2477+
data = idx.get_values_from_dict(mapper)
2478+
indexer = idx.get_indexer(self.values)
2479+
mapped_values = algos.take_1d(data, indexer)
24562480
else:
2457-
if isinstance(mapper, dict):
2458-
mapper = mapper.get
2459-
mapped_values = self._arrmap(self._values, mapper)
2481+
mapped_values = self._arrmap(self.values, mapper)
24602482

24612483
attributes = self._get_attributes_dict()
24622484
if mapped_values.size and isinstance(mapped_values[0], tuple):

pandas/tests/indexes/test_base.py

+31-14
Original file line numberDiff line numberDiff line change
@@ -809,10 +809,10 @@ def test_map_tseries_indices_return_index(self):
809809
exp = Index(range(24), name='hourly')
810810
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))
811811

812-
def test_map_with_series_all_indices(self):
812+
def test_map_with_dict_and_series(self):
813813
expected = Index(['foo', 'bar', 'baz'])
814814
mapper = Series(expected.values, index=[0, 1, 2])
815-
self.assert_index_equal(tm.makeIntIndex(3).map(mapper), expected)
815+
tm.assert_index_equal(tm.makeIntIndex(3).map(mapper), expected)
816816

817817
# GH 12766
818818
# special = []
@@ -822,41 +822,58 @@ def test_map_with_series_all_indices(self):
822822
orig_values = ['a', 'B', 1, 'a']
823823
new_values = ['one', 2, 3.0, 'one']
824824
cur_index = CategoricalIndex(orig_values, name='XXX')
825+
expected = CategoricalIndex(new_values,
826+
name='XXX', categories=[3.0, 2, 'one'])
827+
825828
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
826-
expected = CategoricalIndex(new_values, name='XXX')
827829
output = cur_index.map(mapper)
828-
self.assert_numpy_array_equal(expected.values.get_values(), output.values.get_values())
829-
self.assert_equal(expected.name, output.name)
830+
# Order of categories in output can be different
831+
tm.assert_index_equal(expected, output)
832+
833+
mapper = {o: n for o, n in
834+
zip(orig_values[:-1], new_values[:-1])}
835+
output = cur_index.map(mapper)
836+
# Order of categories in output can be different
837+
tm.assert_index_equal(expected, output)
830838

831839
for name in list(set(self.indices.keys()) - set(special)):
832840
cur_index = self.indices[name]
833841
expected = Index(np.arange(len(cur_index), 0, -1))
834-
mapper = pd.Series(expected.values, index=cur_index)
835-
print(name)
836-
output = cur_index.map(mapper)
837-
self.assert_index_equal(expected, cur_index.map(mapper))
842+
mapper = pd.Series(expected, index=cur_index)
843+
tm.assert_index_equal(expected, cur_index.map(mapper))
844+
845+
mapper = {o: n for o, n in
846+
zip(cur_index, expected)}
847+
if mapper:
848+
tm.assert_index_equal(expected, cur_index.map(mapper))
849+
else:
850+
# The expected index type is Int64Index
851+
# but the output defaults to Float64
852+
tm.assert_index_equal(Float64Index([]),
853+
cur_index.map(mapper))
838854

839855
def test_map_with_categorical_series(self):
840856
# GH 12756
841857
a = Index([1, 2, 3, 4])
842-
b = Series(["even", "odd", "even", "odd"], dtype="category")
858+
b = Series(["even", "odd", "even", "odd"],
859+
dtype="category")
843860
c = Series(["even", "odd", "even", "odd"])
844861

845862
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
846-
self.assert_index_equal(a.map(b), exp)
863+
tm.assert_index_equal(a.map(b), exp)
847864
exp = Index(["odd", "even", "odd", np.nan])
848-
self.assert_index_equal(a.map(c), exp)
865+
tm.assert_index_equal(a.map(c), exp)
849866

850867
def test_map_with_non_function_missing_values(self):
851868
# GH 12756
852869
expected = Index([2., np.nan, 'foo'])
853870
input = Index([2, 1, 0])
854871

855872
mapper = Series(['foo', 2., 'baz'], index=[0, 2, -1])
856-
self.assert_index_equal(expected, input.map(mapper))
873+
tm.assert_index_equal(expected, input.map(mapper))
857874

858875
mapper = {0: 'foo', 2: 2.0, -1: 'baz'}
859-
self.assert_index_equal(expected, input.map(mapper))
876+
tm.assert_index_equal(expected, input.map(mapper))
860877

861878
def test_append_multiple(self):
862879
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])

pandas/tseries/index.py

+24
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,30 @@ def get_value_maybe_box(self, series, key):
13801380
key, tz=self.tz)
13811381
return _maybe_box(self, values, series, key)
13821382

1383+
def get_values_from_dict(self, input_dict):
1384+
"""Return the values of the input dictionary in the order the keys are
1385+
in the index. np.nan is returned for index values not in the
1386+
dictionary.
1387+
1388+
Parameters
1389+
----------
1390+
input_dict : dict
1391+
The dictionary from which to extract the values
1392+
1393+
Returns
1394+
-------
1395+
Union[np.array, list]
1396+
1397+
"""
1398+
if len(input_dict):
1399+
# coerce back to datetime objects for lookup
1400+
input_dict = com._dict_compat(input_dict)
1401+
return lib.fast_multiget(input_dict,
1402+
self.asobject.values,
1403+
default=np.nan)
1404+
else:
1405+
return np.nan
1406+
13831407
def get_loc(self, key, method=None, tolerance=None):
13841408
"""
13851409
Get integer location for requested label

pandas/tseries/period.py

+19
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,25 @@ def _get_unique_index(self, dropna=False):
783783
res = res.dropna()
784784
return res
785785

786+
def get_values_from_dict(self, input_dict):
787+
"""Return the values of the input dictionary in the order the keys are
788+
in the index. np.nan is returned for index values not in the
789+
dictionary.
790+
791+
Parameters
792+
----------
793+
input_dict : dict
794+
The dictionary from which to extract the values
795+
796+
Returns
797+
-------
798+
Union[np.array, list]
799+
800+
"""
801+
802+
return np.array([input_dict.get(i, np.nan) for i in self.values]
803+
if input_dict else [np.nan])
804+
786805
def get_loc(self, key, method=None, tolerance=None):
787806
"""
788807
Get integer location for requested label

pandas/tseries/tdi.py

+20
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,26 @@ def get_value_maybe_box(self, series, key):
665665
values = self._engine.get_value(_values_from_object(series), key)
666666
return _maybe_box(self, values, series, key)
667667

668+
def get_values_from_dict(self, input_dict):
669+
"""Return the values of the input dictionary in the order the keys are
670+
in the index. np.nan is returned for index values not in the
671+
dictionary.
672+
673+
Parameters
674+
----------
675+
input_dict : dict
676+
The dictionary from which to extract the values
677+
678+
Returns
679+
-------
680+
Union[np.array, list]
681+
682+
"""
683+
684+
return np.array([input_dict.get(i, np.nan)
685+
for i in self.asobject.values]
686+
if input_dict else [np.nan])
687+
668688
def get_loc(self, key, method=None, tolerance=None):
669689
"""
670690
Get integer location for requested label

pandas/tseries/tests/test_period.py

+4
Original file line numberDiff line numberDiff line change
@@ -3511,6 +3511,10 @@ def test_map(self):
35113511
result = index.map(lambda x: x + 1)
35123512
expected = index + 1
35133513
tm.assert_index_equal(result, expected)
3514+
series_map = pd.Series(expected, index)
3515+
tm.assert_index_equal(index.map(series_map), expected)
3516+
dict_map = {i: e for e, i in zip(expected, index)}
3517+
tm.assert_index_equal(index.map(dict_map), expected)
35143518

35153519
result = index.map(lambda x: x.ordinal)
35163520
exp = Index([x.ordinal for x in index])

pandas/tseries/tests/test_timedeltas.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1530,14 +1530,19 @@ def test_ops_error_str(self):
15301530
l != r
15311531

15321532
def test_map(self):
1533-
15341533
rng = timedelta_range('1 day', periods=10)
15351534

15361535
f = lambda x: x.days
15371536
result = rng.map(f)
15381537
exp = Int64Index([f(x) for x in rng])
15391538
tm.assert_index_equal(result, exp)
15401539

1540+
s = Series(exp, index=rng)
1541+
tm.assert_index_equal(rng.map(s), exp)
1542+
1543+
d = {r: v for r, v in zip(rng, exp)}
1544+
tm.assert_index_equal(rng.map(d), exp)
1545+
15411546
def test_misc_coverage(self):
15421547

15431548
rng = timedelta_range('1 day', periods=5)

pandas/tseries/tests/test_timeseries.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -3012,9 +3012,16 @@ def test_map(self):
30123012
rng = date_range('1/1/2000', periods=10)
30133013

30143014
f = lambda x: x.strftime('%Y%m%d')
3015-
result = rng.map(f)
3016-
exp = Index([f(x) for x in rng], dtype='<U8')
3017-
tm.assert_index_equal(result, exp)
3015+
exp_values = [f(x) for x in rng]
3016+
exp = Index(exp_values, dtype='<U8')
3017+
3018+
tm.assert_index_equal(rng.map(f), exp)
3019+
3020+
mapper = {o: n for o, n in zip(rng, exp_values)}
3021+
tm.assert_index_equal(rng.map(mapper), exp)
3022+
3023+
mapper = Series(exp_values, index=rng)
3024+
tm.assert_index_equal(rng.map(mapper), exp)
30183025

30193026
def test_iteration_preserves_tz(self):
30203027

0 commit comments

Comments
 (0)