Skip to content

DOC: use shared_docs for Index.get_indexer, get_indexer_non_unique #15411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/compat/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs):

return np.array(arr, *args, **kwargs)


__all__ = ['np',
'_np_version_under1p8',
'_np_version_under1p9',
Expand Down
7 changes: 7 additions & 0 deletions pandas/compat/numpy/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __call__(self, args, kwargs, fname=None,
raise ValueError("invalid validation method "
"'{method}'".format(method=method))


ARGMINMAX_DEFAULTS = dict(out=None)
validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin',
method='both', max_fname_arg_count=1)
Expand Down Expand Up @@ -97,6 +98,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs):
validate_argmax(args, kwargs)
return skipna


ARGSORT_DEFAULTS = OrderedDict()
ARGSORT_DEFAULTS['axis'] = -1
ARGSORT_DEFAULTS['kind'] = 'quicksort'
Expand All @@ -121,6 +123,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs):
validate_argsort(args, kwargs, max_fname_arg_count=1)
return ascending


CLIP_DEFAULTS = dict(out=None)
validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip',
method='both', max_fname_arg_count=3)
Expand All @@ -141,6 +144,7 @@ def validate_clip_with_axis(axis, args, kwargs):
validate_clip(args, kwargs)
return axis


COMPRESS_DEFAULTS = OrderedDict()
COMPRESS_DEFAULTS['axis'] = None
COMPRESS_DEFAULTS['out'] = None
Expand Down Expand Up @@ -170,6 +174,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
validate_cum_func(args, kwargs, fname=name)
return skipna


LOGICAL_FUNC_DEFAULTS = dict(out=None)
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs')

Expand Down Expand Up @@ -236,6 +241,7 @@ def validate_take_with_convert(convert, args, kwargs):
validate_take(args, kwargs, max_fname_arg_count=3, method='both')
return convert


TRANSPOSE_DEFAULTS = dict(axes=None)
validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose',
method='both', max_fname_arg_count=0)
Expand Down Expand Up @@ -318,6 +324,7 @@ def validate_groupby_func(name, args, kwargs, allowed=None):
"with groupby. Use .groupby(...)."
"{func}() instead".format(func=name)))


RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
'mean', 'std', 'var')

Expand Down
1 change: 1 addition & 0 deletions pandas/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ def visitor(x, y):
operands = node.values
return reduce(visitor, operands)


# ast.Call signature changed on 3.5,
# conditionally change which methods is named
# visit_Call depending on Python version, #11097
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,7 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr):
else:
return inds


_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'}


Expand Down Expand Up @@ -959,6 +960,7 @@ def _hashtable_algo(f, values, return_dtype=None):
# use Object
return f(htable.PyObjectHashTable, _ensure_object)


_hashtables = {
'float64': (htable.Float64HashTable, htable.Float64Vector),
'uint64': (htable.UInt64HashTable, htable.UInt64Vector),
Expand Down
1 change: 1 addition & 0 deletions pandas/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ def inner(x):

return inner


# common type validators, for convenience
# usage: register_option(... , validator = is_int)
is_int = is_type_factory(int)
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ def mpl_style_cb(key):

return val


with cf.config_prefix('display'):
cf.register_option('precision', 6, pc_precision_doc, validator=is_int)
cf.register_option('float_format', None, float_format_doc,
Expand Down Expand Up @@ -380,6 +381,7 @@ def use_inf_as_null_cb(key):
from pandas.types.missing import _use_inf_as_null
_use_inf_as_null(key)


with cf.config_prefix('mode'):
cf.register_option('use_inf_as_null', False, use_inf_as_null_doc,
cb=use_inf_as_null_cb)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5741,9 +5741,9 @@ def _from_nested_dict(data):
def _put_str(s, space):
return ('%s' % s)[:space].ljust(space)


# ----------------------------------------------------------------------
# Add plotting methods to DataFrame

DataFrame.plot = base.AccessorProperty(gfx.FramePlotMethods,
gfx.FramePlotMethods)
DataFrame.hist = gfx.hist_frame
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def get_indexers_list():
('iat', _iAtIndexer),
]


# "null slice"
_NS = slice(None, None)

Expand Down Expand Up @@ -1850,6 +1851,7 @@ def _convert_key(self, key, is_setter=False):
"indexers")
return key


# 32-bit floating point machine epsilon
_eps = np.finfo('f4').eps

Expand Down
2 changes: 1 addition & 1 deletion pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2479,9 +2479,9 @@ def _has_names(index):
else:
return index.name is not None


# -----------------------------------------------------------------------------
# Global formatting options

_initial_defencoding = None


Expand Down
41 changes: 32 additions & 9 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
_unsortable_types = frozenset(('mixed', 'mixed-integer'))

_index_doc_kwargs = dict(klass='Index', inplace='',
target_klass='Index',
unique='Index', duplicated='np.ndarray')
_index_shared_docs = dict()

Expand Down Expand Up @@ -1605,7 +1606,7 @@ def _append_same_dtype(self, to_concat, name):
numpy.ndarray.take
"""

@Appender(_index_shared_docs['take'])
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True,
fill_value=None, **kwargs):
nv.validate_take(tuple(), kwargs)
Expand Down Expand Up @@ -2350,15 +2351,14 @@ def get_level_values(self, level):
self._validate_index_level(level)
return self

def get_indexer(self, target, method=None, limit=None, tolerance=None):
"""
_index_shared_docs['get_indexer'] = """
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
current data to the new index.
Parameters
----------
target : Index
target : %(target_klass)s
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
* default: exact matches only.
* pad / ffill: find the PREVIOUS index value if no exact match.
Expand Down Expand Up @@ -2387,6 +2387,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
positions matches the corresponding target values. Missing values
in the target are marked by -1.
"""

@Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = _ensure_index(target)
if tolerance is not None:
Expand Down Expand Up @@ -2496,11 +2499,28 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance):
indexer = np.where(distance <= tolerance, indexer, -1)
return indexer

_index_shared_docs['get_indexer_non_unique'] = """
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
current data to the new index.
Parameters
----------
target : %(target_klass)s
Returns
-------
indexer : ndarray of int
Integers from 0 to n - 1 indicating that the index at these
positions matches the corresponding target values. Missing values
in the target are marked by -1.
missing : ndarray of int
An indexer into the target of the values not found.
These correspond to the -1 in the indexer array
"""

@Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
def get_indexer_non_unique(self, target):
""" return an indexer suitable for taking from a non unique index
return the labels in the same order as the target, and
return a missing indexer into the target (missing are marked as -1
in the indexer); target must be an iterable """
target = _ensure_index(target)
pself, ptarget = self._possibly_promote(target)
if pself is not self or ptarget is not target:
Expand All @@ -2516,7 +2536,10 @@ def get_indexer_non_unique(self, target):
return Index(indexer), missing

def get_indexer_for(self, target, **kwargs):
""" guaranteed return of an indexer even when non-unique """
"""
guaranteed return of an indexer even when non-unique
This dispatches to get_indexer or get_indexer_nonunique as appropriate
"""
if self.is_unique:
return self.get_indexer(target, **kwargs)
indexer, _ = self.get_indexer_non_unique(target, **kwargs)
Expand Down
40 changes: 7 additions & 33 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import pandas.core.base as base
import pandas.core.missing as missing
import pandas.indexes.base as ibase
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))


class CategoricalIndex(Index, base.PandasDelegate):
Expand Down Expand Up @@ -289,7 +291,7 @@ def _engine(self):
def is_unique(self):
return not self.duplicated().any()

@Appender(base._shared_docs['unique'] % ibase._index_doc_kwargs)
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
def unique(self):
result = base.IndexOpsMixin.unique(self)
# CategoricalIndex._shallow_copy uses keeps original categories
Expand All @@ -299,7 +301,7 @@ def unique(self):

@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
False: 'first'})
@Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs)
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
def duplicated(self, keep='first'):
from pandas.hashtable import duplicated_int64
codes = self.codes.astype('i8')
Expand Down Expand Up @@ -425,34 +427,8 @@ def _reindex_non_unique(self, target):

return new_target, indexer, new_indexer

@Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
"""
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
current data to the new index. The mask determines whether labels are
found or not in the current index
Parameters
----------
target : MultiIndex or Index (of tuples)
method : {'pad', 'ffill', 'backfill', 'bfill'}
pad / ffill: propagate LAST valid observation forward to next valid
backfill / bfill: use NEXT valid observation to fill gap
Notes
-----
This is a low-level method and probably should be used at your own risk
Examples
--------
>>> indexer, mask = index.get_indexer(new_index)
>>> new_values = cur_values.take(indexer)
>>> new_values[-mask] = np.nan
Returns
-------
(indexer, mask) : (ndarray, ndarray)
"""
method = missing.clean_reindex_fill_method(method)
target = ibase._ensure_index(target)

Expand All @@ -472,10 +448,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):

return _ensure_platform_int(indexer)

@Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
def get_indexer_non_unique(self, target):
""" this is the same for a CategoricalIndex for get_indexer; the API
returns the missing values as well
"""
target = ibase._ensure_index(target)

if isinstance(target, CategoricalIndex):
Expand All @@ -497,7 +471,7 @@ def _convert_list_indexer(self, keyarr, kind=None):

return None

@Appender(_index_shared_docs['take'])
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True,
fill_value=None, **kwargs):
nv.validate_take(tuple(), kwargs)
Expand Down
40 changes: 11 additions & 29 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
_get_na_value, InvalidIndexError,
_index_shared_docs)
import pandas.indexes.base as ibase
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(
dict(klass='MultiIndex',
target_klass='MultiIndex or list of tuples'))


class MultiIndex(Index):
Expand Down Expand Up @@ -755,7 +759,7 @@ def f(k, stringify):

@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
False: 'first'})
@Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs)
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
def duplicated(self, keep='first'):
from pandas.core.sorting import get_group_index
from pandas.hashtable import duplicated_int64
Expand Down Expand Up @@ -1244,7 +1248,7 @@ def __getitem__(self, key):
names=self.names, sortorder=sortorder,
verify_integrity=False)

@Appender(_index_shared_docs['take'])
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True,
fill_value=None, **kwargs):
nv.validate_take(tuple(), kwargs)
Expand Down Expand Up @@ -1564,34 +1568,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):

return new_index, indexer

@Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
"""
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
current data to the new index. The mask determines whether labels are
found or not in the current index
Parameters
----------
target : MultiIndex or Index (of tuples)
method : {'pad', 'ffill', 'backfill', 'bfill'}
pad / ffill: propagate LAST valid observation forward to next valid
backfill / bfill: use NEXT valid observation to fill gap
Notes
-----
This is a low-level method and probably should be used at your own risk
Examples
--------
>>> indexer, mask = index.get_indexer(new_index)
>>> new_values = cur_values.take(indexer)
>>> new_values[-mask] = np.nan
Returns
-------
(indexer, mask) : (ndarray, ndarray)
"""
method = missing.clean_reindex_fill_method(method)
target = _ensure_index(target)

Expand Down Expand Up @@ -1633,6 +1611,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):

return _ensure_platform_int(indexer)

@Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
def get_indexer_non_unique(self, target):
return super(MultiIndex, self).get_indexer_non_unique(target)

def reindex(self, target, method=None, level=None, limit=None,
tolerance=None):
"""
Expand Down
Loading