-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Allow indices to be mapped through through dictionaries or series #15081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
c8d2a80
9a67ffd
2b70597
80ca2e2
00165c4
2f019c5
f6a2404
0c72a38
73c276b
a858467
68479a3
80a4c9c
30e7e7a
25bba86
51c5b2b
8f0198e
80fad28
5006b24
4134641
096b0e6
c0f3b76
dd0b7e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,6 @@ | |
from pandas.compat.numpy import function as nv | ||
from pandas import compat | ||
|
||
|
||
from pandas.core.dtypes.generic import ( | ||
ABCSeries, | ||
ABCMultiIndex, | ||
|
@@ -2821,6 +2820,27 @@ def get_indexer_for(self, target, **kwargs): | |
indexer, _ = self.get_indexer_non_unique(target, **kwargs) | ||
return indexer | ||
|
||
_index_shared_docs['_get_values_from_dict'] = """ | ||
Return the values of the input dictionary in the order the keys are | ||
in the index. np.nan is returned for index values not in the | ||
dictionary. | ||
|
||
Parameters | ||
---------- | ||
data : dict | ||
The dictionary from which to extract the values | ||
|
||
Returns | ||
------- | ||
np.array | ||
|
||
""" | ||
|
||
@Appender(_index_shared_docs['_get_values_from_dict']) | ||
def _get_values_from_dict(self, data): | ||
return lib.fast_multiget(data, self.values, | ||
default=np.nan) | ||
|
||
def _maybe_promote(self, other): | ||
# A hack, but it works | ||
from pandas.core.indexes.datetimes import DatetimeIndex | ||
|
@@ -2859,13 +2879,16 @@ def groupby(self, values): | |
|
||
return result | ||
|
||
def map(self, mapper): | ||
"""Apply mapper function to an index. | ||
def map(self, arg, na_action=None): | ||
"""Map values of Series using input correspondence (which can be a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first sentence should be a single line so that it looks OK in the autosummary table. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
dict, Series, or function) | ||
|
||
Parameters | ||
---------- | ||
mapper : callable | ||
Function to be applied. | ||
arg : function, dict, or Series | ||
na_action : {None, 'ignore'} | ||
If 'ignore', propagate NA values, without passing them to the | ||
mapping function | ||
|
||
Returns | ||
------- | ||
|
@@ -2875,15 +2898,23 @@ def map(self, mapper): | |
a MultiIndex will be returned. | ||
|
||
""" | ||
|
||
from .multi import MultiIndex | ||
mapped_values = self._arrmap(self.values, mapper) | ||
new_values = super(Index, self)._map_values( | ||
arg, na_action=na_action) | ||
attributes = self._get_attributes_dict() | ||
if mapped_values.size and isinstance(mapped_values[0], tuple): | ||
return MultiIndex.from_tuples(mapped_values, | ||
names=attributes.get('name')) | ||
if new_values.size and isinstance(new_values[0], tuple): | ||
if isinstance(self, MultiIndex): | ||
names = self.names | ||
elif attributes.get('name'): | ||
names = [attributes.get('name')] * len(new_values[0]) | ||
else: | ||
names = None | ||
return MultiIndex.from_tuples(new_values, | ||
names=names) | ||
|
||
attributes['copy'] = False | ||
return Index(mapped_values, **attributes) | ||
return Index(new_values, **attributes) | ||
|
||
def isin(self, values, level=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -352,7 +352,7 @@ def map(self, f): | |
|
||
# Try to use this result if we can | ||
if isinstance(result, np.ndarray): | ||
self._shallow_copy(result) | ||
result = Index(result) | ||
|
||
if not isinstance(result, Index): | ||
raise TypeError('The map function must return an Index object') | ||
|
@@ -698,6 +698,14 @@ def __rsub__(self, other): | |
def _add_delta(self, other): | ||
return NotImplemented | ||
|
||
@Appender(_index_shared_docs['_get_values_from_dict']) | ||
def _get_values_from_dict(self, data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems odd that you are returning np.nan for a datetimelike. do tests hit this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would assume it does get hit indirectly as it was essentially pulled into the subclasses from the pd.Series instantiation. It is essentially replicating the behavior that was previously in lines 188-203 of pandas/core/series.py and allowing each object to handle it as seen fit rather than doing it in an if conditional. Personally this way make more sense to me but since I ended up not making use of this functionality outside of the Series instantiation I'm happy to revert the changes if you'd prefer. |
||
if len(data): | ||
return np.array([data.get(i, np.nan) | ||
for i in self.asobject.values]) | ||
|
||
return np.array([np.nan]) | ||
|
||
def _add_delta_td(self, other): | ||
# add a delta of a timedeltalike | ||
# return the i8 result view | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1451,6 +1451,17 @@ def get_value_maybe_box(self, series, key): | |
key, tz=self.tz) | ||
return _maybe_box(self, values, series, key) | ||
|
||
@Appender(_index_shared_docs['_get_values_from_dict']) | ||
def _get_values_from_dict(self, data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if this is needed for datetimeliek then push this to the super class rather than having 2 impls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See the above comment. |
||
if len(data): | ||
# coerce back to datetime objects for lookup | ||
data = com._dict_compat(data) | ||
return lib.fast_multiget(data, | ||
self.asobject.values, | ||
default=np.nan) | ||
|
||
return np.array([np.nan]) | ||
|
||
def get_loc(self, key, method=None, tolerance=None): | ||
""" | ||
Get integer location for requested label | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
technically need to deprecate and rename mapper -> arg add a versionadded (0.21.0) for na_action.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this something I need to do? If so can you show me how you've done this in the past? Otherwise should I just change arg -> mapper?