Skip to content

Commit 6f4e36a

Browse files
nateyoderjreback
authored andcommitted
API: map() on Index returns an Index, not array
closes #12766 closes #12798 This is a follow on to #12798. Author: Nate Yoder <[email protected]> Closes #14506 from nateyoder/index_map_index and squashes the following commits: 95e4440 [Nate Yoder] fix typo and add ref tag in whatsnew b36e83c [Nate Yoder] update whatsnew, fix documentation 4635e6a [Nate Yoder] compare as index a17ddab [Nate Yoder] Fix unused import and docstrings per pep8radius docformatter; change other uses of assert_index_equal to testing instead os self ab168e7 [Nate Yoder] Update whatsnew and add git PR to tests to denote changes 504c2a2 [Nate Yoder] Fix tests that weren't run by PyCharm 23c133d [Nate Yoder] Update tests to match dtype int64 07b772a [Nate Yoder] use the numpy results if we can to avoid repeating the computation just to create the object a110be9 [Nate Yoder] make map on time tseries indices return index if dtype of output is not a tseries; sphinx changes; fix docstring a596744 [Nate Yoder] introspect results from map so that if the output array has tuples we create a multiindex instead of an index 5fc66c3 [Nate Yoder] make map return an index if it operates on an index, multi index, or categorical index; map on a categorical will either return a categorical or an index (rather than a numpy array)
1 parent 2566223 commit 6f4e36a

File tree

13 files changed

+188
-61
lines changed

13 files changed

+188
-61
lines changed

doc/source/whatsnew/v0.20.0.txt

+70-1
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,77 @@ Other enhancements
9191
Backwards incompatible API changes
9292
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9393

94-
.. _whatsnew_0200.api:
9594

95+
.. _whatsnew.api_breaking.index_map
96+
97+
Map on Index types now return other Index types
98+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
99+
100+
- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`)
101+
102+
.. ipython:: python
103+
104+
idx = Index([1, 2])
105+
idx
106+
mi = MultiIndex.from_tuples([(1, 2), (2, 4)])
107+
mi
108+
109+
Previous Behavior:
110+
111+
.. code-block:: ipython
112+
113+
In [5]: idx.map(lambda x: x * 2)
114+
Out[5]: array([2, 4])
115+
116+
In [6]: idx.map(lambda x: (x, x * 2))
117+
Out[6]: array([(1, 2), (2, 4)], dtype=object)
118+
119+
In [7]: mi.map(lambda x: x)
120+
Out[7]: array([(1, 2), (2, 4)], dtype=object)
121+
122+
In [8]: mi.map(lambda x: x[0])
123+
Out[8]: array([1, 2])
124+
125+
New Behavior:
126+
127+
.. ipython:: python
128+
129+
idx.map(lambda x: x * 2)
130+
131+
idx.map(lambda x: (x, x * 2))
132+
133+
mi.map(lambda x: x)
134+
135+
mi.map(lambda x: x[0])
136+
137+
138+
- ``map`` on a Series with datetime64 values may return int64 dtypes rather than int32
139+
140+
.. ipython:: python
141+
142+
s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo'))
143+
s
144+
145+
Previous Behavior:
146+
147+
.. code-block:: ipython
148+
149+
In [9]: s.map(lambda x: x.hour)
150+
Out[9]:
151+
0 0
152+
1 1
153+
2 2
154+
dtype: int32
155+
156+
157+
New Behavior:
158+
159+
.. ipython:: python
160+
161+
s.map(lambda x: x.hour)
162+
163+
164+
.. _whatsnew_0200.api:
96165

97166
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
98167
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)

pandas/core/categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -930,8 +930,7 @@ def remove_unused_categories(self, inplace=False):
930930
return cat
931931

932932
def map(self, mapper):
933-
"""
934-
Apply mapper function to its categories (not codes).
933+
"""Apply mapper function to its categories (not codes).
935934
936935
Parameters
937936
----------
@@ -943,7 +942,8 @@ def map(self, mapper):
943942
944943
Returns
945944
-------
946-
applied : Categorical or np.ndarray.
945+
applied : Categorical or Index.
946+
947947
"""
948948
new_categories = self.categories.map(mapper)
949949
try:

pandas/indexes/base.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -2427,8 +2427,7 @@ def groupby(self, values):
24272427
return result
24282428

24292429
def map(self, mapper):
2430-
"""
2431-
Apply mapper function to its values.
2430+
"""Apply mapper function to an index.
24322431
24332432
Parameters
24342433
----------
@@ -2437,9 +2436,21 @@ def map(self, mapper):
24372436
24382437
Returns
24392438
-------
2440-
applied : array
2439+
applied : Union[Index, MultiIndex], inferred
2440+
The output of the mapping function applied to the index.
2441+
If the function returns a tuple with more than one element
2442+
a MultiIndex will be returned.
2443+
24412444
"""
2442-
return self._arrmap(self.values, mapper)
2445+
from .multi import MultiIndex
2446+
mapped_values = self._arrmap(self.values, mapper)
2447+
attributes = self._get_attributes_dict()
2448+
if mapped_values.size and isinstance(mapped_values[0], tuple):
2449+
return MultiIndex.from_tuples(mapped_values,
2450+
names=attributes.get('name'))
2451+
2452+
attributes['copy'] = False
2453+
return Index(mapped_values, **attributes)
24432454

24442455
def isin(self, values, level=None):
24452456
"""

pandas/indexes/category.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -517,22 +517,22 @@ def take(self, indices, axis=0, allow_fill=True,
517517
return self._create_from_codes(taken)
518518

519519
def map(self, mapper):
520-
"""
521-
Apply mapper function to its categories (not codes).
520+
"""Apply mapper function to its categories (not codes).
522521
523522
Parameters
524523
----------
525524
mapper : callable
526525
Function to be applied. When all categories are mapped
527-
to different categories, the result will be Categorical which has
528-
the same order property as the original. Otherwise, the result will
529-
be np.ndarray.
526+
to different categories, the result will be a CategoricalIndex
527+
which has the same order property as the original. Otherwise,
528+
the result will be a Index.
530529
531530
Returns
532531
-------
533-
applied : Categorical or np.ndarray.
532+
applied : CategoricalIndex or Index
533+
534534
"""
535-
return self.values.map(mapper)
535+
return self._shallow_copy_with_infer(self.values.map(mapper))
536536

537537
def delete(self, loc):
538538
"""

pandas/tests/indexes/test_base.py

+52-10
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,48 @@ def test_sub(self):
767767
self.assertRaises(TypeError, lambda: idx - idx.tolist())
768768
self.assertRaises(TypeError, lambda: idx.tolist() - idx)
769769

770+
def test_map_identity_mapping(self):
771+
# GH 12766
772+
for name, cur_index in self.indices.items():
773+
tm.assert_index_equal(cur_index, cur_index.map(lambda x: x))
774+
775+
def test_map_with_tuples(self):
776+
# GH 12766
777+
778+
# Test that returning a single tuple from an Index
779+
# returns an Index.
780+
boolean_index = tm.makeIntIndex(3).map(lambda x: (x,))
781+
expected = Index([(0,), (1,), (2,)])
782+
tm.assert_index_equal(boolean_index, expected)
783+
784+
# Test that returning a tuple from a map of a single index
785+
# returns a MultiIndex object.
786+
boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1))
787+
expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)])
788+
tm.assert_index_equal(boolean_index, expected)
789+
790+
# Test that returning a single object from a MultiIndex
791+
# returns an Index.
792+
first_level = ['foo', 'bar', 'baz']
793+
multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3]))
794+
reduced_index = multi_index.map(lambda x: x[0])
795+
tm.assert_index_equal(reduced_index, Index(first_level))
796+
797+
def test_map_tseries_indices_return_index(self):
798+
date_index = tm.makeDateIndex(10)
799+
exp = Index([1] * 10)
800+
tm.assert_index_equal(exp, date_index.map(lambda x: 1))
801+
802+
period_index = tm.makePeriodIndex(10)
803+
tm.assert_index_equal(exp, period_index.map(lambda x: 1))
804+
805+
tdelta_index = tm.makeTimedeltaIndex(10)
806+
tm.assert_index_equal(exp, tdelta_index.map(lambda x: 1))
807+
808+
date_index = tm.makeDateIndex(24, freq='h', name='hourly')
809+
exp = Index(range(24), name='hourly')
810+
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))
811+
770812
def test_append_multiple(self):
771813
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
772814

@@ -1194,16 +1236,16 @@ def check_slice(in_slice, expected):
11941236
self.assert_index_equal(result, expected)
11951237

11961238
for in_slice, expected in [
1197-
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
1198-
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
1199-
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
1200-
(SLC['y'::-4], 'yb'),
1201-
# absent labels
1202-
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
1203-
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
1204-
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
1205-
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
1206-
(SLC['m':'m':-1], '')
1239+
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
1240+
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
1241+
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
1242+
(SLC['y'::-4], 'yb'),
1243+
# absent labels
1244+
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
1245+
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
1246+
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
1247+
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
1248+
(SLC['m':'m':-1], '')
12071249
]:
12081250
check_slice(in_slice, expected)
12091251

pandas/tests/indexes/test_category.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -207,19 +207,20 @@ def test_map(self):
207207
ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'),
208208
ordered=True)
209209
result = ci.map(lambda x: x.lower())
210-
exp = pd.Categorical(list('ababc'), categories=list('cba'),
211-
ordered=True)
212-
tm.assert_categorical_equal(result, exp)
210+
exp = pd.CategoricalIndex(list('ababc'), categories=list('cba'),
211+
ordered=True)
212+
tm.assert_index_equal(result, exp)
213213

214214
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
215215
ordered=False, name='XXX')
216216
result = ci.map(lambda x: x.lower())
217-
exp = pd.Categorical(list('ababc'), categories=list('bac'),
218-
ordered=False)
219-
tm.assert_categorical_equal(result, exp)
217+
exp = pd.CategoricalIndex(list('ababc'), categories=list('bac'),
218+
ordered=False, name='XXX')
219+
tm.assert_index_equal(result, exp)
220220

221-
tm.assert_numpy_array_equal(ci.map(lambda x: 1),
222-
np.array([1] * 5, dtype=np.int64))
221+
# GH 12766: Return an index not an array
222+
tm.assert_index_equal(ci.map(lambda x: 1),
223+
Index(np.array([1] * 5, dtype=np.int64), name='XXX'))
223224

224225
# change categories dtype
225226
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
@@ -228,9 +229,9 @@ def f(x):
228229
return {'A': 10, 'B': 20, 'C': 30}.get(x)
229230

230231
result = ci.map(f)
231-
exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
232-
ordered=False)
233-
tm.assert_categorical_equal(result, exp)
232+
exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30],
233+
ordered=False)
234+
tm.assert_index_equal(result, exp)
234235

235236
def test_where(self):
236237
i = self.create_index()

pandas/tests/series/test_apply.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,9 @@ def test_apply_datetimetz(self):
123123
tm.assert_series_equal(result, exp)
124124

125125
# change dtype
126+
# GH 14506 : Returned dtype changed from int32 to int64
126127
result = s.apply(lambda x: x.hour)
127-
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
128+
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
128129
tm.assert_series_equal(result, exp)
129130

130131
# not vectorized
@@ -317,8 +318,9 @@ def test_map_datetimetz(self):
317318
tm.assert_series_equal(result, exp)
318319

319320
# change dtype
321+
# GH 14506 : Returned dtype changed from int32 to int64
320322
result = s.map(lambda x: x.hour)
321-
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
323+
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
322324
tm.assert_series_equal(result, exp)
323325

324326
with tm.assertRaises(NotImplementedError):

pandas/tests/test_categorical.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1669,7 +1669,8 @@ def test_map(self):
16691669
tm.assert_categorical_equal(result, exp)
16701670

16711671
result = c.map(lambda x: 1)
1672-
tm.assert_numpy_array_equal(result, np.array([1] * 5, dtype=np.int64))
1672+
# GH 12766: Return an index not an array
1673+
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
16731674

16741675

16751676
class TestCategoricalAsBlock(tm.TestCase):

pandas/tseries/base.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from pandas.util.decorators import Appender, cache_readonly
2828
import pandas.types.concat as _concat
2929
import pandas.tseries.frequencies as frequencies
30-
import pandas.algos as _algos
3130

3231

3332
class DatelikeOps(object):
@@ -330,11 +329,16 @@ def _nat_new(self, box=True):
330329
def map(self, f):
331330
try:
332331
result = f(self)
333-
if not isinstance(result, (np.ndarray, Index)):
334-
raise TypeError
332+
333+
# Try to use this result if we can
334+
if isinstance(result, np.ndarray):
335+
self._shallow_copy(result)
336+
337+
if not isinstance(result, Index):
338+
raise TypeError('The map function must return an Index object')
335339
return result
336340
except Exception:
337-
return _algos.arrmap_object(self.asobject.values, f)
341+
return self.asobject.map(f)
338342

339343
def sort_values(self, return_indexer=False, ascending=True):
340344
"""

pandas/tseries/tests/test_converter.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import nose
44

55
import numpy as np
6-
from pandas import Timestamp, Period
6+
from pandas import Timestamp, Period, Index
77
from pandas.compat import u
88
import pandas.util.testing as tm
99
from pandas.tseries.offsets import Second, Milli, Micro
@@ -104,8 +104,8 @@ def test_dateindex_conversion(self):
104104
for freq in ('B', 'L', 'S'):
105105
dateindex = tm.makeDateIndex(k=10, freq=freq)
106106
rs = self.dtc.convert(dateindex, None, None)
107-
xp = converter.dates.date2num(dateindex._mpl_repr())
108-
tm.assert_almost_equal(rs, xp, decimals)
107+
xp = Index(converter.dates.date2num(dateindex._mpl_repr()))
108+
tm.assert_index_equal(rs, xp, decimals)
109109

110110
def test_resolution(self):
111111
def _assert_less(ts1, ts2):

pandas/tseries/tests/test_period.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -3521,8 +3521,8 @@ def test_map(self):
35213521
tm.assert_index_equal(result, expected)
35223522

35233523
result = index.map(lambda x: x.ordinal)
3524-
exp = np.array([x.ordinal for x in index], dtype=np.int64)
3525-
tm.assert_numpy_array_equal(result, exp)
3524+
exp = Index([x.ordinal for x in index])
3525+
tm.assert_index_equal(result, exp)
35263526

35273527
def test_map_with_string_constructor(self):
35283528
raw = [2005, 2007, 2009]
@@ -3534,20 +3534,17 @@ def test_map_with_string_constructor(self):
35343534
types += text_type,
35353535

35363536
for t in types:
3537-
expected = np.array(lmap(t, raw), dtype=object)
3537+
expected = Index(lmap(t, raw))
35383538
res = index.map(t)
35393539

3540-
# should return an array
3541-
tm.assertIsInstance(res, np.ndarray)
3540+
# should return an Index
3541+
tm.assertIsInstance(res, Index)
35423542

35433543
# preserve element types
35443544
self.assertTrue(all(isinstance(resi, t) for resi in res))
35453545

3546-
# dtype should be object
3547-
self.assertEqual(res.dtype, np.dtype('object').type)
3548-
35493546
# lastly, values should compare equal
3550-
tm.assert_numpy_array_equal(res, expected)
3547+
tm.assert_index_equal(res, expected)
35513548

35523549
def test_convert_array_of_periods(self):
35533550
rng = period_range('1/1/2000', periods=20, freq='D')

0 commit comments

Comments
 (0)