-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API: map() on Index returns an Index, not array #14506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5fc66c3
a596744
a110be9
07b772a
23c133d
504c2a2
ab168e7
a17ddab
4635e6a
b36e83c
95e4440
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,8 +91,77 @@ Other enhancements | |
Backwards incompatible API changes | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
.. _whatsnew_0200.api: | ||
|
||
.. _whatsnew.api_breaking.index_map | ||
|
||
Map on Index types now return other Index types | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can skip this example (the CategoricalIndex) and below |
||
.. ipython:: python | ||
|
||
idx = Index([1, 2]) | ||
idx | ||
mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) | ||
mi | ||
|
||
Previous Behavior: | ||
|
||
.. code-block:: ipython | ||
|
||
In [5]: idx.map(lambda x: x * 2) | ||
Out[5]: array([2, 4]) | ||
|
||
In [6]: idx.map(lambda x: (x, x * 2)) | ||
Out[6]: array([(1, 2), (2, 4)], dtype=object) | ||
|
||
In [7]: mi.map(lambda x: x) | ||
Out[7]: array([(1, 2), (2, 4)], dtype=object) | ||
|
||
In [8]: mi.map(lambda x: x[0]) | ||
Out[8]: array([1, 2]) | ||
|
||
New Behavior: | ||
|
||
.. ipython:: python | ||
|
||
idx.map(lambda x: x * 2) | ||
|
||
idx.map(lambda x: (x, x * 2)) | ||
|
||
mi.map(lambda x: x) | ||
|
||
mi.map(lambda x: x[0]) | ||
|
||
|
||
- ``map`` on a Series with datetime64 values may return int64 dtypes rather than int32 | ||
|
||
.. ipython:: python | ||
|
||
s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) | ||
s | ||
|
||
Previous Behavior: | ||
|
||
.. code-block:: ipython | ||
|
||
In [9]: s.map(lambda x: x.hour) | ||
Out[9]: | ||
0 0 | ||
1 1 | ||
2 2 | ||
dtype: int32 | ||
|
||
|
||
New Behavior: | ||
|
||
.. ipython:: python | ||
|
||
s.map(lambda x: x.hour) | ||
|
||
|
||
.. _whatsnew_0200.api: | ||
|
||
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) | ||
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -930,8 +930,7 @@ def remove_unused_categories(self, inplace=False): | |
return cat | ||
|
||
def map(self, mapper): | ||
""" | ||
Apply mapper function to its categories (not codes). | ||
"""Apply mapper function to its categories (not codes). | ||
|
||
Parameters | ||
---------- | ||
|
@@ -943,7 +942,8 @@ def map(self, mapper): | |
|
||
Returns | ||
------- | ||
applied : Categorical or np.ndarray. | ||
applied : Categorical or Index. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually this is ok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
""" | ||
new_categories = self.categories.map(mapper) | ||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2427,8 +2427,7 @@ def groupby(self, values): | |
return result | ||
|
||
def map(self, mapper): | ||
""" | ||
Apply mapper function to its values. | ||
"""Apply mapper function to an index. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -2437,9 +2436,21 @@ def map(self, mapper): | |
|
||
Returns | ||
------- | ||
applied : array | ||
applied : Union[Index, MultiIndex], inferred | ||
The output of the mapping function applied to the index. | ||
If the function returns a tuple with more than one element | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. say the output Index type will be inferred |
||
a MultiIndex will be returned. | ||
|
||
""" | ||
return self._arrmap(self.values, mapper) | ||
from .multi import MultiIndex | ||
mapped_values = self._arrmap(self.values, mapper) | ||
attributes = self._get_attributes_dict() | ||
if mapped_values.size and isinstance(mapped_values[0], tuple): | ||
return MultiIndex.from_tuples(mapped_values, | ||
names=attributes.get('name')) | ||
|
||
attributes['copy'] = False | ||
return Index(mapped_values, **attributes) | ||
|
||
def isin(self, values, level=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -767,6 +767,48 @@ def test_sub(self): | |
self.assertRaises(TypeError, lambda: idx - idx.tolist()) | ||
self.assertRaises(TypeError, lambda: idx.tolist() - idx) | ||
|
||
def test_map_identity_mapping(self): | ||
# GH 12766 | ||
for name, cur_index in self.indices.items(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment with the appropriate github issue numbers (don't go crazy but where appropriate) |
||
tm.assert_index_equal(cur_index, cur_index.map(lambda x: x)) | ||
|
||
def test_map_with_tuples(self): | ||
# GH 12766 | ||
|
||
# Test that returning a single tuple from an Index | ||
# returns an Index. | ||
boolean_index = tm.makeIntIndex(3).map(lambda x: (x,)) | ||
expected = Index([(0,), (1,), (2,)]) | ||
tm.assert_index_equal(boolean_index, expected) | ||
|
||
# Test that returning a tuple from a map of a single index | ||
# returns a MultiIndex object. | ||
boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) | ||
expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) | ||
tm.assert_index_equal(boolean_index, expected) | ||
|
||
# Test that returning a single object from a MultiIndex | ||
# returns an Index. | ||
first_level = ['foo', 'bar', 'baz'] | ||
multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3])) | ||
reduced_index = multi_index.map(lambda x: x[0]) | ||
tm.assert_index_equal(reduced_index, Index(first_level)) | ||
|
||
def test_map_tseries_indices_return_index(self): | ||
date_index = tm.makeDateIndex(10) | ||
exp = Index([1] * 10) | ||
tm.assert_index_equal(exp, date_index.map(lambda x: 1)) | ||
|
||
period_index = tm.makePeriodIndex(10) | ||
tm.assert_index_equal(exp, period_index.map(lambda x: 1)) | ||
|
||
tdelta_index = tm.makeTimedeltaIndex(10) | ||
tm.assert_index_equal(exp, tdelta_index.map(lambda x: 1)) | ||
|
||
date_index = tm.makeDateIndex(24, freq='h', name='hourly') | ||
exp = Index(range(24), name='hourly') | ||
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) | ||
|
||
def test_append_multiple(self): | ||
index = Index(['a', 'b', 'c', 'd', 'e', 'f']) | ||
|
||
|
@@ -1194,16 +1236,16 @@ def check_slice(in_slice, expected): | |
self.assert_index_equal(result, expected) | ||
|
||
for in_slice, expected in [ | ||
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), | ||
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), | ||
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), | ||
(SLC['y'::-4], 'yb'), | ||
# absent labels | ||
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), | ||
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), | ||
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), | ||
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), | ||
(SLC['m':'m':-1], '') | ||
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), | ||
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), | ||
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), | ||
(SLC['y'::-4], 'yb'), | ||
# absent labels | ||
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), | ||
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), | ||
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), | ||
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), | ||
(SLC['m':'m':-1], '') | ||
]: | ||
check_slice(in_slice, expected) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -207,19 +207,20 @@ def test_map(self): | |
ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), | ||
ordered=True) | ||
result = ci.map(lambda x: x.lower()) | ||
exp = pd.Categorical(list('ababc'), categories=list('cba'), | ||
ordered=True) | ||
tm.assert_categorical_equal(result, exp) | ||
exp = pd.CategoricalIndex(list('ababc'), categories=list('cba'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. show an example like this in the whatsnew as well (e.g. CategoryIndex.map -> CI rather than Category now) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @jreback. Just wanted to touchbase here since I added this example and then you asked me to remove in another comment below. Did I add it in the wrong place or did you just decide it was a little overkill? Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's too long for the what's new; so need to pare it down There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 sounds good. I shortened it up like you suggested. Let me know if you'd like any other changes. |
||
ordered=True) | ||
tm.assert_index_equal(result, exp) | ||
|
||
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), | ||
ordered=False, name='XXX') | ||
result = ci.map(lambda x: x.lower()) | ||
exp = pd.Categorical(list('ababc'), categories=list('bac'), | ||
ordered=False) | ||
tm.assert_categorical_equal(result, exp) | ||
exp = pd.CategoricalIndex(list('ababc'), categories=list('bac'), | ||
ordered=False, name='XXX') | ||
tm.assert_index_equal(result, exp) | ||
|
||
tm.assert_numpy_array_equal(ci.map(lambda x: 1), | ||
np.array([1] * 5, dtype=np.int64)) | ||
# GH 12766: Return an index not an array | ||
tm.assert_index_equal(ci.map(lambda x: 1), | ||
Index(np.array([1] * 5, dtype=np.int64), name='XXX')) | ||
|
||
# change categories dtype | ||
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), | ||
|
@@ -228,9 +229,9 @@ def f(x): | |
return {'A': 10, 'B': 20, 'C': 30}.get(x) | ||
|
||
result = ci.map(f) | ||
exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], | ||
ordered=False) | ||
tm.assert_categorical_equal(result, exp) | ||
exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], | ||
ordered=False) | ||
tm.assert_index_equal(result, exp) | ||
|
||
def test_where(self): | ||
i = self.create_index() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -123,8 +123,9 @@ def test_apply_datetimetz(self): | |
tm.assert_series_equal(result, exp) | ||
|
||
# change dtype | ||
# GH 14506 : Returned dtype changed from int32 to int64 | ||
result = s.apply(lambda x: x.hour) | ||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32) | ||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, I don't think this should have changed, these are normally any idea? (also this might not be the same on windows) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's true it previously gave int32, but is there any reason for that? We almost always use int64 as the default integer size, and also currently Timestamp.hour gives you back int64 when you replace the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's probably the result of using
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is actually an implementation detail (as they r stored as int32) we can change but that should be separate PR ideally just like to preserve here |
||
tm.assert_series_equal(result, exp) | ||
|
||
# not vectorized | ||
|
@@ -317,8 +318,9 @@ def test_map_datetimetz(self): | |
tm.assert_series_equal(result, exp) | ||
|
||
# change dtype | ||
# GH 14506 : Returned dtype changed from int32 to int64 | ||
result = s.map(lambda x: x.hour) | ||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32) | ||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) | ||
tm.assert_series_equal(result, exp) | ||
|
||
with tm.assertRaises(NotImplementedError): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,6 @@ | |
from pandas.util.decorators import Appender, cache_readonly | ||
import pandas.types.concat as _concat | ||
import pandas.tseries.frequencies as frequencies | ||
import pandas.algos as _algos | ||
|
||
|
||
class DatelikeOps(object): | ||
|
@@ -330,11 +329,16 @@ def _nat_new(self, box=True): | |
def map(self, f): | ||
try: | ||
result = f(self) | ||
if not isinstance(result, (np.ndarray, Index)): | ||
raise TypeError | ||
|
||
# Try to use this result if we can | ||
if isinstance(result, np.ndarray): | ||
self._shallow_copy(result) | ||
|
||
if not isinstance(result, Index): | ||
raise TypeError('The map function must return an Index object') | ||
return result | ||
except Exception: | ||
return _algos.arrmap_object(self.asobject.values, f) | ||
return self.asobject.map(f) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is better, using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this might actually close another issue... |
||
|
||
def sort_values(self, return_indexer=False, ascending=True): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this needs a ref tag as well
.. _whatsnew.api_breaking.index_map