Skip to content

Commit 0468cad

Browse files
committed
ENH: add tolerance to get_indexer, get_loc and reindex
1 parent 13cb1a7 commit 0468cad

File tree

12 files changed

+328
-56
lines changed

12 files changed

+328
-56
lines changed

doc/source/basics.rst

+24
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,30 @@ Note that the same result could have been achieved using
11001100
increasing or descreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate`
11011101
will not make any checks on the order of the index.
11021102

1103+
.. _basics.limits_on_reindex_fill:
1104+
1105+
Limits on filling while reindexing
1106+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1107+
1108+
The ``limit`` and ``tolerance`` arguments provide additional control over
1109+
filling while reindexing. Limit specifies the maximum count of consecutive
1110+
matches:
1111+
1112+
.. ipython:: python
1113+
1114+
ts2.reindex(ts.index, method='ffill', limit=1)
1115+
1116+
In contrast, tolerance specifies the maximum distance between the index and
1117+
indexer values:
1118+
1119+
.. ipython:: python
1120+
1121+
ts2.reindex(ts.index, method='ffill', tolerance='1 day')
1122+
1123+
Notice that when used on a ``DatetimeIndex``, ``TimedeltaIndex`` or
1124+
``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible.
1125+
This allows you to specify tolerance with appropriate strings.
1126+
11031127
.. _basics.drop:
11041128

11051129
Dropping labels from an axis

doc/source/whatsnew/v0.17.0.txt

+16
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,22 @@ Other enhancements
180180
s.drop_duplicates(keep=False)
181181

182182

183+
- Reindex now has a ``tolerance`` argument that allows for finer control of :ref:`basics.limits_on_reindex_fill`:
184+
185+
.. ipython:: python
186+
187+
df = pd.DataFrame({'x': range(5), 't': pd.date_range('2000-01-01', periods=5)})
188+
df.reindex([0.1, 1.9, 3.5], method='nearest', tolerance=0.2)
189+
190+
When used on a ``DatetimeIndex``, ``TimedeltaIndex`` or ``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. This allows you to specify tolerance with a string:
191+
192+
.. ipython:: python
193+
194+
df = df.set_index('t')
195+
df.reindex(pd.to_datetime(['1999-12-31']), method='nearest', tolerance='1 day')
196+
197+
``tolerance`` is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods.
198+
183199
.. _whatsnew_0170.api:
184200

185201
.. _whatsnew_0170.api_breaking:

pandas/core/frame.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -2518,33 +2518,36 @@ def lookup(self, row_labels, col_labels):
25182518
#----------------------------------------------------------------------
25192519
# Reindexing and alignment
25202520

2521-
def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
2521+
def _reindex_axes(self, axes, level, limit, tolerance, method,
2522+
fill_value, copy):
25222523
frame = self
25232524

25242525
columns = axes['columns']
25252526
if columns is not None:
25262527
frame = frame._reindex_columns(columns, copy, level, fill_value,
2527-
limit)
2528+
limit, tolerance)
25282529

25292530
index = axes['index']
25302531
if index is not None:
25312532
frame = frame._reindex_index(index, method, copy, level,
2532-
fill_value, limit)
2533+
fill_value, limit, tolerance)
25332534

25342535
return frame
25352536

25362537
def _reindex_index(self, new_index, method, copy, level, fill_value=NA,
2537-
limit=None):
2538+
limit=None, tolerance=None):
25382539
new_index, indexer = self.index.reindex(new_index, method, level,
2539-
limit=limit)
2540+
limit=limit,
2541+
tolerance=tolerance)
25402542
return self._reindex_with_indexers({0: [new_index, indexer]},
25412543
copy=copy, fill_value=fill_value,
25422544
allow_dups=False)
25432545

25442546
def _reindex_columns(self, new_columns, copy, level, fill_value=NA,
2545-
limit=None):
2547+
limit=None, tolerance=None):
25462548
new_columns, indexer = self.columns.reindex(new_columns, level=level,
2547-
limit=limit)
2549+
limit=limit,
2550+
tolerance=tolerance)
25482551
return self._reindex_with_indexers({1: [new_columns, indexer]},
25492552
copy=copy, fill_value=fill_value,
25502553
allow_dups=False)

pandas/core/generic.py

+32-10
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
922922
in the store wherever possible
923923
fletcher32 : bool, default False
924924
If applying compression use the fletcher32 checksum
925-
dropna : boolean, default False.
925+
dropna : boolean, default False.
926926
If true, ALL nan rows will not be written to store.
927927
928928
"""
@@ -1551,7 +1551,8 @@ def select(self, crit, axis=0):
15511551

15521552
return self.reindex(**{axis_name: new_axis})
15531553

1554-
def reindex_like(self, other, method=None, copy=True, limit=None):
1554+
def reindex_like(self, other, method=None, copy=True, limit=None,
1555+
tolerance=None):
15551556
""" return an object with matching indicies to myself
15561557
15571558
Parameters
@@ -1560,7 +1561,12 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
15601561
method : string or None
15611562
copy : boolean, default True
15621563
limit : int, default None
1563-
Maximum size gap to forward or backward fill
1564+
Maximum number of consecutive labels to fill for inexact matches.
1565+
tolerance : optional
1566+
Maximum distance between labels of the other object and this
1567+
object for inexact matches.
1568+
1569+
.. versionadded:: 0.17.0
15641570
15651571
Notes
15661572
-----
@@ -1572,7 +1578,8 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
15721578
reindexed : same as input
15731579
"""
15741580
d = other._construct_axes_dict(axes=self._AXIS_ORDERS,
1575-
method=method, copy=copy, limit=limit)
1581+
method=method, copy=copy, limit=limit,
1582+
tolerance=tolerance)
15761583

15771584
return self.reindex(**d)
15781585

@@ -1736,7 +1743,13 @@ def sort_index(self, axis=0, ascending=True):
17361743
Value to use for missing values. Defaults to NaN, but can be any
17371744
"compatible" value
17381745
limit : int, default None
1739-
Maximum size gap to forward or backward fill
1746+
Maximum number of consecutive elements to forward or backward fill
1747+
tolerance : optional
1748+
Maximum distance between original and new labels for inexact
1749+
matches. The values of the index at the matching locations most
1750+
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
1751+
1752+
.. versionadded:: 0.17.0
17401753
17411754
Examples
17421755
--------
@@ -1758,6 +1771,7 @@ def reindex(self, *args, **kwargs):
17581771
level = kwargs.pop('level', None)
17591772
copy = kwargs.pop('copy', True)
17601773
limit = kwargs.pop('limit', None)
1774+
tolerance = kwargs.pop('tolerance', None)
17611775
fill_value = kwargs.pop('fill_value', np.nan)
17621776

17631777
if kwargs:
@@ -1782,10 +1796,11 @@ def reindex(self, *args, **kwargs):
17821796
pass
17831797

17841798
# perform the reindex on the axes
1785-
return self._reindex_axes(axes, level, limit,
1799+
return self._reindex_axes(axes, level, limit, tolerance,
17861800
method, fill_value, copy).__finalize__(self)
17871801

1788-
def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
1802+
def _reindex_axes(self, axes, level, limit, tolerance, method,
1803+
fill_value, copy):
17891804
""" perform the reinxed for all the axes """
17901805
obj = self
17911806
for a in self._AXIS_ORDERS:
@@ -1795,7 +1810,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
17951810

17961811
ax = self._get_axis(a)
17971812
new_index, indexer = ax.reindex(
1798-
labels, level=level, limit=limit, method=method)
1813+
labels, level=level, limit=limit, tolerance=tolerance,
1814+
method=method)
17991815

18001816
axis = self._get_axis_number(a)
18011817
obj = obj._reindex_with_indexers(
@@ -1836,7 +1852,13 @@ def _reindex_multi(self, axes, copy, fill_value):
18361852
Broadcast across a level, matching Index values on the
18371853
passed MultiIndex level
18381854
limit : int, default None
1839-
Maximum size gap to forward or backward fill
1855+
Maximum number of consecutive elements to forward or backward fill
1856+
tolerance : optional
1857+
Maximum distance between original and new labels for inexact
1858+
matches. The values of the index at the matching locations most
1859+
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
1860+
1861+
.. versionadded:: 0.17.0
18401862
18411863
Examples
18421864
--------
@@ -2910,7 +2932,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
29102932
use the actual numerical values of the index
29112933
* 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all
29122934
wrappers around the scipy interpolation methods of similar
2913-
names. These use the actual numerical values of the index. See
2935+
names. These use the actual numerical values of the index. See
29142936
the scipy documentation for more on their behavior:
29152937
http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation
29162938
http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html

0 commit comments

Comments
 (0)