-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: tolerance now takes list-like argument for reindex and get_indexer. #17367
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
bbe248c
44b08f2
e190435
700b20a
2b549b1
7e7051a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2470,9 +2470,10 @@ def reindex_like(self, other, method=None, copy=True, limit=None, | |
Maximum number of consecutive labels to fill for inexact matches. | ||
tolerance : optional | ||
Maximum distance between labels of the other object and this | ||
object for inexact matches. | ||
object for inexact matches. Can be list-like. | ||
|
||
.. versionadded:: 0.17.0 | ||
.. versionadded:: 0.21.0 (list-like tolerance) | ||
|
||
Notes | ||
----- | ||
|
@@ -2860,7 +2861,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, | |
matches. The values of the index at the matching locations most | ||
satisfy the equation ``abs(index[indexer] - target) <= tolerance``. | ||
|
||
Tolerance may be a scalar value, which applies the same tolerance | ||
to all values, or list-like, which applies variable tolerance per | ||
element. List-like includes list, tuple, array, Series, and must be | ||
the same size as the index and its dtype must exactly match the | ||
index's type. | ||
|
||
.. versionadded:: 0.17.0 | ||
.. versionadded:: 0.21.0 (list-like tolerance) | ||
|
||
Examples | ||
-------- | ||
|
@@ -3120,7 +3128,14 @@ def _reindex_multi(self, axes, copy, fill_value): | |
matches. The values of the index at the matching locations most | ||
satisfy the equation ``abs(index[indexer] - target) <= tolerance``. | ||
|
||
Tolerance may be a scalar value, which applies the same tolerance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same (bonus points if we can use a _shared_docs here to avoid code duplication) |
||
to all values, or list-like, which applies variable tolerance per | ||
element. List-like includes list, tuple, array, Series, and must be | ||
the same size as the index and its dtype must exactly match the | ||
index's type. | ||
|
||
.. versionadded:: 0.17.0 | ||
.. versionadded:: 0.21.0 (list-like tolerance) | ||
|
||
Examples | ||
-------- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2484,7 +2484,14 @@ def _get_unique_index(self, dropna=False): | |
the index at the matching location most satisfy the equation | ||
``abs(index[loc] - key) <= tolerance``. | ||
|
||
Tolerance may be a scalar | ||
value, which applies the same tolerance to all values, or | ||
list-like, which applies variable tolerance per element. List-like | ||
includes list, tuple, array, Series, and must be the same size as | ||
the index and its dtype must exactly match the index's type. | ||
|
||
.. versionadded:: 0.17.0 | ||
.. versionadded:: 0.21.0 (list-like tolerance) | ||
|
||
Returns | ||
------- | ||
|
@@ -2627,7 +2634,14 @@ def _get_level_values(self, level): | |
matches. The values of the index at the matching locations most | ||
satisfy the equation ``abs(index[indexer] - target) <= tolerance``. | ||
|
||
Tolerance may be a scalar value, which applies the same tolerance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same. actually let's fix the shared_docs things first in another PR (or here is ok too). too much doc-string duplication. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense. How would I do that though? This (and the 3 others above) is already in a _shared_docs, so this would require another layer of abstraction above _shared_docs. Quite a bit of duplication with the existing tolerance docstrings as well. |
||
to all values, or list-like, which applies variable tolerance per | ||
element. List-like includes list, tuple, array, Series, and must be | ||
the same size as the index and its dtype must exactly match the | ||
index's type. | ||
|
||
.. versionadded:: 0.17.0 | ||
.. versionadded:: 0.21.0 (list-like tolerance) | ||
|
||
Examples | ||
-------- | ||
|
@@ -2647,7 +2661,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): | |
method = missing.clean_reindex_fill_method(method) | ||
target = _ensure_index(target) | ||
if tolerance is not None: | ||
tolerance = self._convert_tolerance(tolerance) | ||
tolerance = self._convert_tolerance(tolerance, target) | ||
|
||
# Treat boolean labels passed to a numeric index as not found. Without | ||
# this fix False and True would be treated as 0 and 1 respectively. | ||
|
@@ -2683,10 +2697,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): | |
'backfill or nearest reindexing') | ||
|
||
indexer = self._engine.get_indexer(target._values) | ||
|
||
return _ensure_platform_int(indexer) | ||
|
||
def _convert_tolerance(self, tolerance): | ||
def _convert_tolerance(self, tolerance, target): | ||
# override this method on subclasses | ||
tolerance = np.asarray(tolerance) | ||
if target.size != tolerance.size and tolerance.size > 1: | ||
raise ValueError('list-like tolerance size must match ' | ||
'target index size') | ||
return tolerance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of adding this function, just do
you may be able to get away with just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. eg. this |
||
|
||
def _get_fill_indexer(self, target, method, limit=None, tolerance=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
from pandas import compat | ||
from pandas.compat.numpy import function as nv | ||
from pandas.core.tools.timedeltas import to_timedelta | ||
|
||
import numpy as np | ||
from pandas.core.dtypes.common import ( | ||
|
@@ -431,13 +432,12 @@ def asobject(self): | |
from pandas.core.index import Index | ||
return Index(self._box_values(self.asi8), name=self.name, dtype=object) | ||
|
||
def _convert_tolerance(self, tolerance): | ||
try: | ||
return Timedelta(tolerance).to_timedelta64() | ||
except ValueError: | ||
raise ValueError('tolerance argument for %s must be convertible ' | ||
'to Timedelta: %r' | ||
% (type(self).__name__, tolerance)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use
|
||
def _convert_tolerance(self, tolerance, target): | ||
tolerance = np.asarray(to_timedelta(tolerance, box=False)) | ||
if target.size != tolerance.size and tolerance.size > 1: | ||
raise ValueError('list-like tolerance size must match ' | ||
'target index size') | ||
return tolerance | ||
|
||
def _maybe_mask_results(self, result, fill_value=None, convert=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -641,12 +641,17 @@ def to_timestamp(self, freq=None, how='start'): | |
return DatetimeIndex(new_data, freq='infer', name=self.name) | ||
|
||
def _maybe_convert_timedelta(self, other): | ||
if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): | ||
if isinstance( | ||
other, (timedelta, np.timedelta64, offsets.Tick, np.ndarray)): | ||
offset = frequencies.to_offset(self.freq.rule_code) | ||
if isinstance(offset, offsets.Tick): | ||
nanos = tslib._delta_to_nanoseconds(other) | ||
if isinstance(other, np.ndarray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can just d
|
||
nanos = np.vectorize(tslib._delta_to_nanoseconds)(other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the array version of this function is almost trivial There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually ignore the above this is ok here |
||
else: | ||
nanos = tslib._delta_to_nanoseconds(other) | ||
offset_nanos = tslib._delta_to_nanoseconds(offset) | ||
if nanos % offset_nanos == 0: | ||
check = np.all(nanos % offset_nanos == 0) | ||
if check: | ||
return nanos // offset_nanos | ||
elif isinstance(other, offsets.DateOffset): | ||
freqstr = other.rule_code | ||
|
@@ -782,7 +787,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): | |
target = target.asi8 | ||
|
||
if tolerance is not None: | ||
tolerance = self._convert_tolerance(tolerance) | ||
tolerance = self._convert_tolerance(tolerance, target) | ||
return Index.get_indexer(self._int64index, target, method, | ||
limit, tolerance) | ||
|
||
|
@@ -825,7 +830,8 @@ def get_loc(self, key, method=None, tolerance=None): | |
try: | ||
ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal | ||
if tolerance is not None: | ||
tolerance = self._convert_tolerance(tolerance) | ||
tolerance = self._convert_tolerance(tolerance, | ||
np.asarray(key)) | ||
return self._int64index.get_loc(ordinal, method, tolerance) | ||
|
||
except KeyError: | ||
|
@@ -908,8 +914,12 @@ def _get_string_slice(self, key): | |
return slice(self.searchsorted(t1.ordinal, side='left'), | ||
self.searchsorted(t2.ordinal, side='right')) | ||
|
||
def _convert_tolerance(self, tolerance): | ||
tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance) | ||
def _convert_tolerance(self, tolerance, target): | ||
tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, | ||
target) | ||
if target.size != tolerance.size and tolerance.size > 1: | ||
raise ValueError('list-like tolerance size must match ' | ||
'target index size') | ||
return self._maybe_convert_timedelta(tolerance) | ||
|
||
def insert(self, loc, item): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,8 +83,12 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): | |
elif isinstance(arg, ABCIndexClass): | ||
return _convert_listlike(arg, unit=unit, box=box, | ||
errors=errors, name=arg.name) | ||
elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1: | ||
return _convert_listlike(arg, unit=unit, box=box, errors=errors) | ||
elif is_list_like(arg) and getattr(arg, 'ndim', 1) <= 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you separate these into 2 conditions on the main if/else, IOW
|
||
if getattr(arg, 'ndim', 1) == 0: | ||
# extract array scalar and process below | ||
arg = arg.item() | ||
else: | ||
return _convert_listlike(arg, unit=unit, box=box, errors=errors) | ||
elif getattr(arg, 'ndim', 1) > 1: | ||
raise TypeError('arg must be a string, timedelta, list, tuple, ' | ||
'1-d array, or Series') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a blank line where you have new text (I think it will render ok)