diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 2aea2572f142b..98c81680aa3c1 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -357,6 +357,39 @@ New Behavior: df.groupby('c', sort=False).nth(1) +.. _whatsnew_0181.numpy_compatibility + +Compatibility between pandas array-like methods (e.g. ```sum`` and ``take``) and their ``numpy`` +counterparts has been greatly increased by augmenting the signatures of the ``pandas`` methods so +as to accept arguments that can be passed in from ``numpy``, even if they are not necessarily +used in the ``pandas`` implementation (:issue:`12644`). Issues that were addressed were: + +- ``.searchsorted()`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`) +- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`) + +An example of this signature augmentation is illustrated below: + +Previous behaviour: + +.. code-block:: ipython + + In [1]: sp = pd.SparseDataFrame([1, 2, 3]) + In [2]: np.cumsum(sp, axis=0) + ... + TypeError: cumsum() takes at most 2 arguments (4 given) + +New behaviour: + +.. code-block:: ipython + + In [1]: sp = pd.SparseDataFrame([1, 2, 3]) + In [2]: np.cumsum(sp, axis=0) + Out[1]: + 0 + 0 1.0 + 1 3.0 + 2 6.0 + .. _whatsnew_0181.apply_resample: Using ``.apply`` on groupby resampling @@ -527,7 +560,6 @@ Bug Fixes - Bug in ``.resample(...)`` with a ``PeriodIndex`` casting to a ``DatetimeIndex`` when empty (:issue:`12868`) - Bug in ``.resample(...)`` with a ``PeriodIndex`` when resampling to an existing frequency (:issue:`12770`) - Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) -- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`) - Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`) - Bugs in concatenation with a coercable dtype was too aggressive, resulting in different dtypes in outputformatting when an object was longer than ``display.max_rows`` (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`, :issue:`12211`) - Bug in ``float_format`` option with option not being validated as a callable. (:issue:`12706`) @@ -547,6 +579,7 @@ Bug Fixes - Segfault in ``to_json`` when attempting to serialise a ``DataFrame`` or ``Series`` with non-ndarray values (:issue:`10778`). - Bug in ``.align`` not returning the sub-class (:issue:`12983`) - Bug in aligning a ``Series`` with a ``DataFrame`` (:issue:`13037`) +- Bug in ``ABCPanel`` in which ``Panel4D`` was not being considered as a valid instance of this generic type (:issue:`12810`) - Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`) diff --git a/pandas/__init__.py b/pandas/__init__.py index 56de3ac5bb974..c26785d87bba0 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -19,7 +19,7 @@ # numpy compat -from pandas.compat.numpy_compat import * +from pandas.compat.numpy import * try: from pandas import hashtable, tslib, lib diff --git a/pandas/compat/numpy_compat.py b/pandas/compat/numpy/__init__.py similarity index 100% rename from pandas/compat/numpy_compat.py rename to pandas/compat/numpy/__init__.py diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py new file mode 100644 index 0000000000000..069cb3638fe75 --- /dev/null +++ b/pandas/compat/numpy/function.py @@ -0,0 +1,247 @@ +""" +For compatibility with numpy libraries, pandas functions or +methods have to accept '*args' and '**kwargs' parameters to +accommodate numpy arguments that are not actually used or +respected in the pandas implementation. + +To ensure that users do not abuse these parameters, validation +is performed in 'validators.py' to make sure that any extra +parameters passed correspond ONLY to those in the numpy signature. +Part of that validation includes whether or not the user attempted +to pass in non-default values for these extraneous parameters. As we +want to discourage users from relying on these parameters when calling +the pandas implementation, we want them only to pass in the default values +for these parameters. + +This module provides a set of commonly used default arguments for functions +and methods that are spread throughout the codebase. This module will make it +easier to adjust to future upstream changes in the analogous numpy signatures. +""" + +from numpy import ndarray +from pandas.util.validators import (validate_args, validate_kwargs, + validate_args_and_kwargs) +from pandas.core.common import is_integer +from pandas.compat import OrderedDict + + +class CompatValidator(object): + def __init__(self, defaults, fname=None, method=None, + max_fname_arg_count=None): + self.fname = fname + self.method = method + self.defaults = defaults + self.max_fname_arg_count = max_fname_arg_count + + def __call__(self, args, kwargs, fname=None, + max_fname_arg_count=None, method=None): + fname = self.fname if fname is None else fname + max_fname_arg_count = (self.max_fname_arg_count if + max_fname_arg_count is None + else max_fname_arg_count) + method = self.method if method is None else method + + if method == 'args': + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == 'kwargs': + validate_kwargs(fname, kwargs, self.defaults) + elif method == 'both': + validate_args_and_kwargs(fname, args, kwargs, + max_fname_arg_count, + self.defaults) + else: + raise ValueError("invalid validation method " + "'{method}'".format(method=method)) + +ARGMINMAX_DEFAULTS = dict(out=None) +validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', + method='both', max_fname_arg_count=1) +validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax', + method='both', max_fname_arg_count=1) + + +def process_skipna(skipna, args): + if isinstance(skipna, ndarray) or skipna is None: + args = (skipna,) + args + skipna = True + + return skipna, args + + +def validate_argmin_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmin' is called via the 'numpy' library, + the third parameter in its signature is 'out', which + takes either an ndarray or 'None', so check if the + 'skipna' parameter is either an instance of ndarray or + is None, since 'skipna' itself should be a boolean + """ + + skipna, args = process_skipna(skipna, args) + validate_argmin(args, kwargs) + return skipna + + +def validate_argmax_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmax' is called via the 'numpy' library, + the third parameter in its signature is 'out', which + takes either an ndarray or 'None', so check if the + 'skipna' parameter is either an instance of ndarray or + is None, since 'skipna' itself should be a boolean + """ + + skipna, args = process_skipna(skipna, args) + validate_argmax(args, kwargs) + return skipna + +ARGSORT_DEFAULTS = OrderedDict() +ARGSORT_DEFAULTS['axis'] = -1 +ARGSORT_DEFAULTS['kind'] = 'quicksort' +ARGSORT_DEFAULTS['order'] = None +validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', + max_fname_arg_count=0, method='both') + + +def validate_argsort_with_ascending(ascending, args, kwargs): + """ + If 'Categorical.argsort' is called via the 'numpy' library, the + first parameter in its signature is 'axis', which takes either + an integer or 'None', so check if the 'ascending' parameter has + either integer type or is None, since 'ascending' itself should + be a boolean + """ + + if is_integer(ascending) or ascending is None: + args = (ascending,) + args + ascending = True + + validate_argsort(args, kwargs, max_fname_arg_count=1) + return ascending + +CLIP_DEFAULTS = dict(out=None) +validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip', + method='both', max_fname_arg_count=3) + + +def validate_clip_with_axis(axis, args, kwargs): + """ + If 'NDFrame.clip' is called via the numpy library, the third + parameter in its signature is 'out', which can takes an ndarray, + so check if the 'axis' parameter is an instance of ndarray, since + 'axis' itself should either be an integer or None + """ + + if isinstance(axis, ndarray): + args = (axis,) + args + axis = None + + validate_clip(args, kwargs) + return axis + +COMPRESS_DEFAULTS = OrderedDict() +COMPRESS_DEFAULTS['axis'] = None +COMPRESS_DEFAULTS['out'] = None +validate_compress = CompatValidator(COMPRESS_DEFAULTS, fname='compress', + method='both', max_fname_arg_count=1) + +CUM_FUNC_DEFAULTS = OrderedDict() +CUM_FUNC_DEFAULTS['dtype'] = None +CUM_FUNC_DEFAULTS['out'] = None +validate_cum_func = CompatValidator(CUM_FUNC_DEFAULTS, method='kwargs') +validate_cumsum = CompatValidator(CUM_FUNC_DEFAULTS, fname='cumsum', + method='both', max_fname_arg_count=1) + +LOGICAL_FUNC_DEFAULTS = dict(out=None) +validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') + +MINMAX_DEFAULTS = dict(out=None) +validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', + method='both', max_fname_arg_count=1) +validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', + method='both', max_fname_arg_count=1) + +RESHAPE_DEFAULTS = dict(order='C') +validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape', + method='both', max_fname_arg_count=1) + +REPEAT_DEFAULTS = dict(axis=None) +validate_repeat = CompatValidator(REPEAT_DEFAULTS, fname='repeat', + method='both', max_fname_arg_count=1) + +ROUND_DEFAULTS = dict(out=None) +validate_round = CompatValidator(ROUND_DEFAULTS, fname='round', + method='both', max_fname_arg_count=1) + +SORT_DEFAULTS = OrderedDict() +SORT_DEFAULTS['axis'] = -1 +SORT_DEFAULTS['kind'] = 'quicksort' +SORT_DEFAULTS['order'] = None +validate_sort = CompatValidator(SORT_DEFAULTS, fname='sort', + method='kwargs') + +STAT_FUNC_DEFAULTS = OrderedDict() +STAT_FUNC_DEFAULTS['dtype'] = None +STAT_FUNC_DEFAULTS['out'] = None +validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, + method='kwargs') +validate_sum = CompatValidator(STAT_FUNC_DEFAULTS, fname='sort', + method='both', max_fname_arg_count=1) +validate_mean = CompatValidator(STAT_FUNC_DEFAULTS, fname='mean', + method='both', max_fname_arg_count=1) + +STAT_DDOF_FUNC_DEFAULTS = OrderedDict() +STAT_DDOF_FUNC_DEFAULTS['dtype'] = None +STAT_DDOF_FUNC_DEFAULTS['out'] = None +validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, + method='kwargs') + +# Currently, numpy (v1.11) has backwards compatibility checks +# in place so that this 'kwargs' parameter is technically +# unnecessary, but in the long-run, this will be needed. +SQUEEZE_DEFAULTS = dict(axis=None) +validate_squeeze = CompatValidator(SQUEEZE_DEFAULTS, fname='squeeze', + method='kwargs') + +TAKE_DEFAULTS = OrderedDict() +TAKE_DEFAULTS['out'] = None +TAKE_DEFAULTS['mode'] = 'raise' +validate_take = CompatValidator(TAKE_DEFAULTS, fname='take', + method='kwargs') + + +def validate_take_with_convert(convert, args, kwargs): + """ + If this function is called via the 'numpy' library, the third + parameter in its signature is 'axis', which takes either an + ndarray or 'None', so check if the 'convert' parameter is either + an instance of ndarray or is None + """ + + if isinstance(convert, ndarray) or convert is None: + args = (convert,) + args + convert = True + + validate_take(args, kwargs, max_fname_arg_count=3, method='both') + return convert + +TRANSPOSE_DEFAULTS = dict(axes=None) +validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose', + method='both', max_fname_arg_count=0) + + +def validate_transpose_for_generic(inst, kwargs): + try: + validate_transpose(tuple(), kwargs) + except ValueError as e: + klass = type(inst).__name__ + msg = str(e) + + # the Panel class actual relies on the 'axes' parameter if called + # via the 'numpy' library, so let's make sure the error is specific + # about saying that the parameter is not supported for particular + # implementations of 'transpose' + if "the 'axes' parameter is not supported" in msg: + msg += " for {klass} instances".format(klass=klass) + + raise ValueError(msg) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 3059c39c2cb82..7ed9e7ff90bd8 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -3,7 +3,6 @@ # flake8: noqa import sys -import numpy as np import pandas import copy import pickle as pkl diff --git a/pandas/core/base.py b/pandas/core/base.py index 0d2b450f53e89..1a812ba2e4878 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -7,6 +7,7 @@ from pandas.core import common as com import pandas.core.nanops as nanops import pandas.lib as lib +from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError @@ -798,8 +799,9 @@ class IndexOpsMixin(object): # ndarray compatibility __array_priority__ = 1000 - def transpose(self): + def transpose(self, *args, **kwargs): """ return the transpose, which is by definition self """ + nv.validate_transpose(args, kwargs) return self T = property(transpose, doc="return the transpose, which is by " diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 47ad2111607c0..4f80c610c1126 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -12,6 +12,7 @@ NoNewAttributesMixin, _shared_docs) import pandas.core.common as com from pandas.core.missing import interpolate_2d +from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) @@ -356,8 +357,13 @@ def itemsize(self): """ return the size of a single category """ return self.categories.itemsize - def reshape(self, new_shape, **kwargs): - """ compat with .reshape """ + def reshape(self, new_shape, *args, **kwargs): + """ + An ndarray-compatible method that returns + `self` because categorical instances cannot + actually be reshaped. + """ + nv.validate_reshape(args, kwargs) return self @property @@ -1087,6 +1093,13 @@ def notnull(self): """ return ~self.isnull() + def put(self, *args, **kwargs): + """ + Replace specific elements in the Categorical with given values. + """ + raise NotImplementedError(("'put' is not yet implemented " + "for Categorical")) + def dropna(self): """ Return the Categorical without null values. @@ -1164,17 +1177,27 @@ def check_for_ordered(self, op): "you can use .as_ordered() to change the " "Categorical to an ordered one\n".format(op=op)) - def argsort(self, ascending=True, **kwargs): - """ Implements ndarray.argsort. - - For internal compatibility with numpy arrays. + def argsort(self, ascending=True, *args, **kwargs): + """ + Returns the indices that would sort the Categorical instance if + 'sort_values' was called. This function is implemented to provide + compatibility with numpy ndarray objects. - Only ordered Categoricals can be argsorted! + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. Returns ------- argsorted : numpy array + + See also + -------- + numpy.ndarray.argsort """ + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) result = np.argsort(self._codes.copy(), **kwargs) if not ascending: result = result[::-1] @@ -1297,7 +1320,7 @@ def order(self, inplace=False, ascending=True, na_position='last'): return self.sort_values(inplace=inplace, ascending=ascending, na_position=na_position) - def sort(self, inplace=True, ascending=True, na_position='last'): + def sort(self, inplace=True, ascending=True, na_position='last', **kwargs): """ DEPRECATED: use :meth:`Categorical.sort_values`. That function is just like this one, except that a new Categorical is returned @@ -1310,6 +1333,7 @@ def sort(self, inplace=True, ascending=True, na_position='last'): """ warn("sort is deprecated, use sort_values(...)", FutureWarning, stacklevel=2) + nv.validate_sort(tuple(), kwargs) return self.sort_values(inplace=inplace, ascending=ascending, na_position=na_position) @@ -1792,7 +1816,7 @@ def describe(self): return result - def repeat(self, repeats): + def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Categorical. @@ -1801,6 +1825,7 @@ def repeat(self, repeats): numpy.ndarray.repeat """ + nv.validate_repeat(args, kwargs) codes = self._codes.repeat(repeats) return Categorical(values=codes, categories=self.categories, ordered=self.ordered, fastpath=True) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1ec5b05aa7eef..b209b6d6ec543 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -44,9 +44,9 @@ from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat +from pandas.compat.numpy import function as nv from pandas.util.decorators import (deprecate, Appender, Substitution, deprecate_kwarg) -from pandas.util.validators import validate_args from pandas.tseries.period import PeriodIndex from pandas.tseries.index import DatetimeIndex @@ -1770,9 +1770,10 @@ def memory_usage(self, index=True, deep=False): index=['Index']).append(result) return result - def transpose(self): + def transpose(self, *args, **kwargs): """Transpose index and columns""" - return super(DataFrame, self).transpose(1, 0) + nv.validate_transpose(args, dict()) + return super(DataFrame, self).transpose(1, 0, **kwargs) T = property(transpose) @@ -3174,7 +3175,7 @@ def trans(v): return self._constructor(new_data).__finalize__(self) def sort(self, columns=None, axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last'): + kind='quicksort', na_position='last', **kwargs): """ DEPRECATED: use :meth:`DataFrame.sort_values` @@ -3209,6 +3210,7 @@ def sort(self, columns=None, axis=0, ascending=True, inplace=False, ------- sorted : DataFrame """ + nv.validate_sort(tuple(), kwargs) if columns is None: warnings.warn("sort(....) is deprecated, use sort_index(.....)", @@ -4434,7 +4436,7 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy, indicator=indicator) - def round(self, decimals=0, *args): + def round(self, decimals=0, *args, **kwargs): """ Round a DataFrame to a variable number of decimal places. @@ -4502,8 +4504,7 @@ def _series_round(s, decimals): return s.round(decimals) return s - validate_args(args, min_length=0, max_length=1, - msg="Inplace rounding is not supported") + nv.validate_round(args, kwargs) if isinstance(decimals, (dict, Series)): if isinstance(decimals, Series): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a87e348fa759..b1b38d659b55c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -21,6 +21,7 @@ import pandas.core.datetools as datetools from pandas.formats.printing import pprint_thing from pandas import compat +from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lrange, string_types, isidentifier, set_function_name) from pandas.core.common import (isnull, notnull, is_list_like, @@ -30,7 +31,6 @@ AbstractMethodError) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg -from pandas.util.validators import validate_kwargs from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -469,10 +469,7 @@ def transpose(self, *args, **kwargs): if kwargs.pop('copy', None) or (len(args) and args[-1]): new_values = new_values.copy() - if kwargs: - raise TypeError('transpose() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) - + nv.validate_transpose_for_generic(self, kwargs) return self._constructor(new_values, **new_axes).__finalize__(self) def swapaxes(self, axis1, axis2, copy=True): @@ -514,8 +511,10 @@ def pop(self, item): return result - def squeeze(self): + def squeeze(self, **kwargs): """Squeeze length 1 dimensions.""" + nv.validate_squeeze(tuple(), kwargs) + try: return self.iloc[tuple([0 if len(a) == 1 else slice(None) for a in self.axes])] @@ -1612,7 +1611,7 @@ def __delitem__(self, key): except KeyError: pass - def take(self, indices, axis=0, convert=True, is_copy=True): + def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): """ Analogous to ndarray.take @@ -1627,7 +1626,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): ------- taken : type of caller """ - + nv.validate_take(tuple(), kwargs) self._consolidate_inplace() new_data = self._data.take(indices, axis=self._get_block_manager_axis(axis), @@ -3604,7 +3603,7 @@ def notnull(self): """ return notnull(self).__finalize__(self) - def clip(self, lower=None, upper=None, out=None, axis=None): + def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): """ Trim values at input threshold(s). @@ -3650,8 +3649,10 @@ def clip(self, lower=None, upper=None, out=None, axis=None): 3 0.230930 0.000000 4 1.100000 0.570967 """ - if out is not None: # pragma: no cover - raise Exception('out argument is not supported yet') + if isinstance(self, com.ABCPanel): + raise NotImplementedError("clip is not supported yet for panels") + + axis = nv.validate_clip_with_axis(axis, args, kwargs) # GH 2747 (arguments were reversed) if lower is not None and upper is not None: @@ -5291,7 +5292,7 @@ def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f): @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): - validate_kwargs(name, kwargs, 'out', 'dtype') + nv.validate_stat_func(tuple(), kwargs) if skipna is None: skipna = True if axis is None: @@ -5311,7 +5312,7 @@ def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): @Appender(_num_ddof_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs): - validate_kwargs(name, kwargs, 'out', 'dtype') + nv.validate_stat_ddof_func(tuple(), kwargs) if skipna is None: skipna = True if axis is None: @@ -5332,7 +5333,7 @@ def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func, @Appender("Return cumulative {0} over requested axis.".format(name) + _cnum_doc) def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): - validate_kwargs(name, kwargs, 'out', 'dtype') + nv.validate_cum_func(tuple(), kwargs) if axis is None: axis = self._stat_axis_number else: @@ -5366,7 +5367,7 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f): @Appender(_bool_doc) def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs): - validate_kwargs(name, kwargs, 'out', 'dtype') + nv.validate_logical_func(tuple(), kwargs) if skipna is None: skipna = True if axis is None: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index dd4697c2eac7f..7a4791189726e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -11,7 +11,7 @@ callable, map ) from pandas import compat -from pandas.compat.numpy_compat import _np_version_under1p8 +from pandas.compat.numpy import _np_version_under1p8 from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) from pandas.core.categorical import Categorical @@ -359,7 +359,7 @@ def __init__(self, obj, keys=None, axis=0, level=None, self.exclusions = set(exclusions) if exclusions else set() # we accept no other args - validate_kwargs('group', kwargs) + validate_kwargs('group', kwargs, {}) def __len__(self): return len(self.groups) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index bced97b0fde47..63fea71895da2 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1257,7 +1257,11 @@ def na_op(x, y): return result @Appender('Wrapper for comparison method %s' % name) - def f(self, other): + def f(self, other, axis=None): + # Validate the axis parameter + if axis is not None: + axis = self._get_axis_number(axis) + if isinstance(other, self._constructor): return self._compare_constructor(other, na_op) elif isinstance(other, (self._constructor_sliced, pd.DataFrame, diff --git a/pandas/core/panel.py b/pandas/core/panel.py index ea88c9f7223a9..7d0bedcc2b381 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,6 +15,7 @@ from pandas import compat from pandas import lib from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) +from pandas.compat.numpy import function as nv from pandas.core.categorical import Categorical from pandas.core.common import (PandasError, _try_sort, _default_index, _infer_dtype_from_scalar, is_list_like) @@ -629,7 +630,7 @@ def head(self, n=5): def tail(self, n=5): raise NotImplementedError - def round(self, decimals=0): + def round(self, decimals=0, *args, **kwargs): """ Round each value in Panel to a specified number of decimal places. @@ -650,6 +651,8 @@ def round(self, decimals=0): -------- numpy.around """ + nv.validate_round(args, kwargs) + if com.is_integer(decimals): result = np.apply_along_axis(np.round, 0, self.values) return self._wrap_result(result, axis=0) @@ -1212,7 +1215,21 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) def transpose(self, *args, **kwargs): - return super(Panel, self).transpose(*args, **kwargs) + # check if a list of axes was passed in instead as a + # single *args element + if (len(args) == 1 and hasattr(args[0], '__iter__') and + not com.is_string_like(args[0])): + axes = args[0] + else: + axes = args + + if 'axes' in kwargs and axes: + raise TypeError("transpose() got multiple values for " + "keyword argument 'axes'") + elif not axes: + axes = kwargs.pop('axes', ()) + + return super(Panel, self).transpose(*axes, **kwargs) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) def fillna(self, value=None, method=None, axis=None, inplace=False, diff --git a/pandas/core/series.py b/pandas/core/series.py index f31903f92cd63..2af3dd26c0a90 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -24,7 +24,7 @@ _maybe_match_name, ABCSparseArray, _coerce_to_dtype, SettingWithCopyError, _maybe_box_datetimelike, ABCDataFrame, - _dict_compat) + _dict_compat, is_integer) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -39,9 +39,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas import compat from pandas.util.terminal import get_terminal_size -from pandas.util.validators import validate_args from pandas.compat import zip, u, OrderedDict, StringIO - +from pandas.compat.numpy import function as nv import pandas.core.ops as ops import pandas.core.algorithms as algos @@ -393,7 +392,7 @@ def ravel(self, order='C'): """ return self._values.ravel(order=order) - def compress(self, condition, axis=0, out=None, **kwargs): + def compress(self, condition, *args, **kwargs): """ Return selected slices of an array along given axis as a Series @@ -401,6 +400,7 @@ def compress(self, condition, axis=0, out=None, **kwargs): -------- numpy.ndarray.compress """ + nv.validate_compress(args, kwargs) return self[condition] def nonzero(self): @@ -431,7 +431,8 @@ def nonzero(self): def put(self, *args, **kwargs): """ - return a ndarray with the values put + Applies the `put` method to its `values` attribute + if it has one. See also -------- @@ -703,7 +704,7 @@ def setitem(key, value): raise except (KeyError, ValueError): values = self._values - if (com.is_integer(key) and + if (is_integer(key) and not self.index.inferred_type == 'integer'): values[key] = value @@ -812,14 +813,16 @@ def _set_values(self, key, value): self._data = self._data.setitem(indexer=key, value=value) self._maybe_update_cacher() - def repeat(self, reps): + def repeat(self, reps, *args, **kwargs): """ - return a new Series with the values repeated reps times + Repeat elements of an Series. Refer to `numpy.ndarray.repeat` + for more information about the `reps` argument. See also -------- numpy.ndarray.repeat """ + nv.validate_repeat(args, kwargs) new_index = self.index.repeat(reps) new_values = self._values.repeat(reps) return self._constructor(new_values, @@ -827,13 +830,13 @@ def repeat(self, reps): def reshape(self, *args, **kwargs): """ - return an ndarray with the values shape - if the specified shape matches exactly the current shape, then - return self (for compat) + Return the values attribute of `self` with shape `args`. + However, if the specified shape matches exactly the current + shape, `self` is returned for compatibility reasons. See also -------- - numpy.ndarray.take + numpy.ndarray.reshape """ if len(args) == 1 and hasattr(args[0], '__iter__'): shape = args[0] @@ -842,6 +845,7 @@ def reshape(self, *args, **kwargs): if tuple(shape) == self.shape: # XXX ignoring the "order" keyword. + nv.validate_reshape(tuple(), kwargs) return self return self._values.reshape(shape, **kwargs) @@ -1216,7 +1220,7 @@ def drop_duplicates(self, keep='first', inplace=False): def duplicated(self, keep='first'): return super(Series, self).duplicated(keep=keep) - def idxmin(self, axis=None, out=None, skipna=True): + def idxmin(self, axis=None, skipna=True, *args, **kwargs): """ Index of first occurrence of minimum of values. @@ -1238,12 +1242,13 @@ def idxmin(self, axis=None, out=None, skipna=True): DataFrame.idxmin numpy.ndarray.argmin """ + skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) i = nanops.nanargmin(_values_from_object(self), skipna=skipna) if i == -1: return np.nan return self.index[i] - def idxmax(self, axis=None, out=None, skipna=True): + def idxmax(self, axis=None, skipna=True, *args, **kwargs): """ Index of first occurrence of maximum of values. @@ -1265,6 +1270,7 @@ def idxmax(self, axis=None, out=None, skipna=True): DataFrame.idxmax numpy.ndarray.argmax """ + skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) i = nanops.nanargmax(_values_from_object(self), skipna=skipna) if i == -1: return np.nan @@ -1274,7 +1280,7 @@ def idxmax(self, axis=None, out=None, skipna=True): argmin = idxmin argmax = idxmax - def round(self, decimals=0, *args): + def round(self, decimals=0, *args, **kwargs): """ Round each value in a Series to the given number of decimals. @@ -1295,9 +1301,7 @@ def round(self, decimals=0, *args): DataFrame.round """ - validate_args(args, min_length=0, max_length=1, - msg="Inplace rounding is not supported") - + nv.validate_round(args, kwargs) result = _values_from_object(self).round(decimals) result = self._constructor(result, index=self.index).__finalize__(self) @@ -2329,7 +2333,7 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - def take(self, indices, axis=0, convert=True, is_copy=False): + def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): """ return Series corresponding to requested indices @@ -2346,6 +2350,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False): -------- numpy.ndarray.take """ + nv.validate_take(tuple(), kwargs) + # check/convert indicies here if convert: indices = maybe_convert_indices(indices, len(self._get_axis(axis))) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 01d825a4ca596..50c86c8bd6e1c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -10,6 +10,7 @@ from pandas.lib import Timestamp, Timedelta, is_datetime_array from pandas.compat import range, u +from pandas.compat.numpy import function as nv from pandas import compat from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin) @@ -452,14 +453,16 @@ def tolist(self): """ return list(self.values) - def repeat(self, n): + def repeat(self, n, *args, **kwargs): """ - return a new Index of the values repeated n times + Repeat elements of an Index. Refer to `numpy.ndarray.repeat` + for more information about the `n` argument. See also -------- numpy.ndarray.repeat """ + nv.validate_repeat(args, kwargs) return self._shallow_copy(self._values.repeat(n)) def ravel(self, order='C'): @@ -1354,8 +1357,10 @@ def _ensure_compat_concat(indexes): numpy.ndarray.take """ - @Appender(_index_shared_docs['take'] % _index_doc_kwargs) - def take(self, indices, axis=0, allow_fill=True, fill_value=None): + @Appender(_index_shared_docs['take']) + def take(self, indices, axis=0, allow_fill=True, + fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) indices = com._ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, @@ -1619,7 +1624,12 @@ def shift(self, periods=1, freq=None): def argsort(self, *args, **kwargs): """ - return an ndarray indexer of the underlying data + Returns the indices that would sort the index and its + underlying data. + + Returns + ------- + argsorted : numpy array See also -------- diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 98cb028aefae8..8f343c5de5fb6 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -3,6 +3,7 @@ import pandas.index as _index from pandas import compat +from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg) from pandas.core.config import get_option @@ -460,7 +461,9 @@ def _convert_list_indexer(self, keyarr, kind=None): return None @Appender(_index_shared_docs['take']) - def take(self, indices, axis=0, allow_fill=True, fill_value=None): + def take(self, indices, axis=0, allow_fill=True, + fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) indices = com._ensure_platform_int(indices) taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index dd58bb30bf7b7..3effc9b1315e6 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -11,6 +11,7 @@ from pandas.lib import Timestamp from pandas.compat import range, zip, lrange, lzip, map +from pandas.compat.numpy import function as nv from pandas import compat from pandas.core.base import FrozenList import pandas.core.base as base @@ -769,7 +770,7 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): levels = self.levels labels = [np.repeat(x, n_repeat) for x in self.labels] # Assumes that each label is divisible by n_shuffle - labels = [x.reshape(n_shuffle, -1).ravel('F') for x in labels] + labels = [x.reshape(n_shuffle, -1).ravel(order='F') for x in labels] names = self.names return MultiIndex(levels=levels, labels=labels, names=names) @@ -1007,7 +1008,9 @@ def __getitem__(self, key): verify_integrity=False) @Appender(_index_shared_docs['take']) - def take(self, indices, axis=0, allow_fill=True, fill_value=None): + def take(self, indices, axis=0, allow_fill=True, + fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) indices = com._ensure_platform_int(indices) taken = self._assert_take_fillable(self.labels, indices, allow_fill=allow_fill, @@ -1074,7 +1077,8 @@ def append(self, other): def argsort(self, *args, **kwargs): return self.values.argsort(*args, **kwargs) - def repeat(self, n): + def repeat(self, n, *args, **kwargs): + nv.validate_repeat(args, kwargs) return MultiIndex(levels=self.levels, labels=[label.view(np.ndarray).repeat(n) for label in self.labels], names=self.names, diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index dbee753af855c..168143fdea047 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -6,6 +6,7 @@ from pandas import compat from pandas.compat import lrange, range +from pandas.compat.numpy import function as nv from pandas.indexes.base import Index, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly import pandas.core.common as com @@ -244,12 +245,19 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): def argsort(self, *args, **kwargs): """ - return an ndarray indexer of the underlying data + Returns the indices that would sort the index and its + underlying data. + + Returns + ------- + argsorted : numpy array See also -------- numpy.ndarray.argsort """ + nv.validate_argsort(args, kwargs) + if self._step > 0: return np.arange(len(self)) else: diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 8dd6c93249221..95fd2d52db009 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -10,7 +10,7 @@ from pandas.util.testing import assert_frame_equal import pandas.io.date_converters as conv import pandas.util.testing as tm -from pandas.compat.numpy_compat import np_array_datetime64_compat +from pandas.compat.numpy import np_array_datetime64_compat class TestConverters(tm.TestCase): diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 15e7d51106bdb..5cb681f4d2e7d 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -15,7 +15,7 @@ from pandas.core.frame import DataFrame import pandas.io.gbq as gbq import pandas.util.testing as tm -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat PROJECT_ID = None PRIVATE_KEY_JSON_PATH = None diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 793a0c237f4a9..e114bee87ca27 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -13,6 +13,7 @@ from pandas import compat, lib from pandas.compat import range +from pandas.compat.numpy import function as nv from pandas._sparse import SparseIndex, BlockIndex, IntIndex import pandas._sparse as splib @@ -318,9 +319,15 @@ def _get_val_at(self, loc): @Appender(_index_shared_docs['take'] % _sparray_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, - fill_value=None): + fill_value=None, **kwargs): + """ + Sparse-compatible version of ndarray.take - # Sparse-compatible version of ndarray.take, returns SparseArray + Returns + ------- + taken : ndarray + """ + nv.validate_take(tuple(), kwargs) if axis: raise ValueError("axis must be 0, input was {0}".format(axis)) @@ -455,7 +462,7 @@ def fillna(self, value, downcast=None): return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) - def sum(self, axis=None, dtype=None, out=None): + def sum(self, axis=0, *args, **kwargs): """ Sum of non-NA/null values @@ -463,6 +470,7 @@ def sum(self, axis=None, dtype=None, out=None): ------- sum : float """ + nv.validate_sum(args, kwargs) valid_vals = self._valid_sp_values sp_sum = valid_vals.sum() if self._null_fill_value: @@ -471,23 +479,25 @@ def sum(self, axis=None, dtype=None, out=None): nsparse = self.sp_index.ngaps return sp_sum + self.fill_value * nsparse - def cumsum(self, axis=0, dtype=None, out=None): + def cumsum(self, axis=0, *args, **kwargs): """ Cumulative sum of values. Preserves locations of NaN values - Extra parameters are to preserve ndarray interface. - Returns ------- cumsum : Series """ + nv.validate_cumsum(args, kwargs) + + # TODO: gh-12855 - return a SparseArray here if com.notnull(self.fill_value): return self.to_dense().cumsum() + # TODO: what if sp_values contains NaN?? return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index, fill_value=self.fill_value) - def mean(self, axis=None, dtype=None, out=None): + def mean(self, axis=0, *args, **kwargs): """ Mean of non-NA/null values @@ -495,6 +505,7 @@ def mean(self, axis=None, dtype=None, out=None): ------- mean : float """ + nv.validate_mean(args, kwargs) valid_vals = self._valid_sp_values sp_sum = valid_vals.sum() ct = len(valid_vals) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index dc18eaa0f9bb7..2e2a2c3e8846c 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -10,6 +10,7 @@ from pandas import compat import numpy as np +from pandas.compat.numpy import function as nv from pandas.core.common import isnull, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series @@ -636,10 +637,11 @@ def rrenamer(x): return this, other - def transpose(self): + def transpose(self, *args, **kwargs): """ Returns a DataFrame with the rows/columns switched. """ + nv.validate_transpose(args, kwargs) return SparseDataFrame( self.values.T, index=self.columns, columns=self.index, default_fill_value=self._default_fill_value, @@ -651,7 +653,7 @@ def transpose(self): def count(self, axis=0, **kwds): return self.apply(lambda x: x.count(), axis=axis) - def cumsum(self, axis=0): + def cumsum(self, axis=0, *args, **kwargs): """ Return SparseDataFrame of cumulative sums over requested axis. @@ -664,6 +666,7 @@ def cumsum(self, axis=0): ------- y : SparseDataFrame """ + nv.validate_cumsum(args, kwargs) return self.apply(lambda x: x.cumsum(), axis=axis) def apply(self, func, axis=0, broadcast=False, reduce=False): diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 032b0f18b6482..a783a7c596955 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -10,6 +10,7 @@ import warnings import operator +from pandas.compat.numpy import function as nv from pandas.core.common import isnull, _values_from_object, _maybe_match_name from pandas.core.index import Index, _ensure_index from pandas.core.series import Series @@ -598,7 +599,7 @@ def sparse_reindex(self, new_index): sparse_index=new_index, fill_value=self.fill_value).__finalize__(self) - def take(self, indices, axis=0, convert=True): + def take(self, indices, axis=0, convert=True, *args, **kwargs): """ Sparse-compatible version of ndarray.take @@ -606,24 +607,28 @@ def take(self, indices, axis=0, convert=True): ------- taken : ndarray """ + convert = nv.validate_take_with_convert(convert, args, kwargs) new_values = SparseArray.take(self.values, indices) new_index = self.index.take(indices) return self._constructor(new_values, index=new_index).__finalize__(self) - def cumsum(self, axis=0, dtype=None, out=None): + def cumsum(self, axis=0, *args, **kwargs): """ Cumulative sum of values. Preserves locations of NaN values Returns ------- - cumsum : Series or SparseSeries + cumsum : SparseSeries if `self` has a null `fill_value` and a + generic Series otherwise """ + nv.validate_cumsum(args, kwargs) new_array = SparseArray.cumsum(self.values) if isinstance(new_array, SparseArray): return self._constructor( new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self) + # TODO: gh-12855 - return a SparseSeries here return Series(new_array, index=self.index).__finalize__(self) def dropna(self, axis=0, inplace=False, **kwargs): diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index b45cdc038a70d..345715ee0528d 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -14,7 +14,6 @@ class TestSparseArray(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): @@ -143,6 +142,19 @@ def test_bad_take(self): assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11)) self.assertRaises(IndexError, lambda: self.arr.take(-11)) + def test_take_invalid_kwargs(self): + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, self.arr.take, + [2, 3], foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, self.arr.take, + [2, 3], out=self.arr) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, self.arr.take, + [2, 3], mode='clip') + def test_take_filling(self): # similar tests as GH 12631 sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) @@ -702,6 +714,107 @@ def test_float_array_comparison(self): self._check_comparison_ops(a, b, values, rvalues) +class TestSparseArrayAnalytics(tm.TestCase): + def test_sum(self): + data = np.arange(10).astype(float) + out = SparseArray(data).sum() + self.assertEqual(out, 45.0) + + data[5] = np.nan + out = SparseArray(data, fill_value=2).sum() + self.assertEqual(out, 40.0) + + out = SparseArray(data, fill_value=np.nan).sum() + self.assertEqual(out, 40.0) + + def test_numpy_sum(self): + data = np.arange(10).astype(float) + out = np.sum(SparseArray(data)) + self.assertEqual(out, 45.0) + + data[5] = np.nan + out = np.sum(SparseArray(data, fill_value=2)) + self.assertEqual(out, 40.0) + + out = np.sum(SparseArray(data, fill_value=np.nan)) + self.assertEqual(out, 40.0) + + msg = "the 'dtype' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.sum, + SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.sum, + SparseArray(data), out=out) + + def test_cumsum(self): + data = np.arange(10).astype(float) + out = SparseArray(data).cumsum() + expected = SparseArray(data.cumsum()) + tm.assert_sp_array_equal(out, expected) + + # TODO: gh-12855 - return a SparseArray here + data[5] = np.nan + out = SparseArray(data, fill_value=2).cumsum() + self.assertNotIsInstance(out, SparseArray) + tm.assert_numpy_array_equal(out, data.cumsum()) + + out = SparseArray(data, fill_value=np.nan).cumsum() + expected = SparseArray(np.array([ + 0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40])) + tm.assert_sp_array_equal(out, expected) + + def test_numpy_cumsum(self): + data = np.arange(10).astype(float) + out = np.cumsum(SparseArray(data)) + expected = SparseArray(data.cumsum()) + tm.assert_sp_array_equal(out, expected) + + # TODO: gh-12855 - return a SparseArray here + data[5] = np.nan + out = np.cumsum(SparseArray(data, fill_value=2)) + self.assertNotIsInstance(out, SparseArray) + tm.assert_numpy_array_equal(out, data.cumsum()) + + out = np.cumsum(SparseArray(data, fill_value=np.nan)) + expected = SparseArray(np.array([ + 0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40])) + tm.assert_sp_array_equal(out, expected) + + msg = "the 'dtype' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + SparseArray(data), out=out) + + def test_mean(self): + data = np.arange(10).astype(float) + out = SparseArray(data).mean() + self.assertEqual(out, 4.5) + + data[5] = np.nan + out = SparseArray(data).mean() + self.assertEqual(out, 40.0 / 9) + + def test_numpy_mean(self): + data = np.arange(10).astype(float) + out = np.mean(SparseArray(data)) + self.assertEqual(out, 4.5) + + data[5] = np.nan + out = np.mean(SparseArray(data)) + self.assertEqual(out, 40.0 / 9) + + msg = "the 'dtype' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.mean, + SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.mean, + SparseArray(data), out=out) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index c179823a67a30..07b97affa62e9 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -778,18 +778,20 @@ def test_count(self): # win32 don't check dtype tm.assert_series_equal(result, dense_result, check_dtype=False) - def test_cumsum(self): - result = self.frame.cumsum() - expected = self.frame.to_dense().cumsum() - tm.assertIsInstance(result, SparseDataFrame) - tm.assert_frame_equal(result.to_dense(), expected) - def _check_all(self, check_func): check_func(self.frame, self.orig) check_func(self.iframe, self.iorig) check_func(self.zframe, self.zorig) check_func(self.fill_frame, self.fill_orig) + def test_numpy_transpose(self): + sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) + result = np.transpose(np.transpose(sdf)) + tm.assert_sp_frame_equal(result, sdf) + + msg = "the 'axes' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1) + def test_combine_first(self): df = self.frame @@ -848,6 +850,35 @@ def test_nan_columnname(self): self.assertTrue(np.isnan(nan_colname_sparse.columns[0])) +class TestSparseDataFrameAnalytics(tm.TestCase): + def setUp(self): + self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C': np.arange(10), + 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + + self.dates = bdate_range('1/1/2011', periods=10) + + self.frame = SparseDataFrame(self.data, index=self.dates) + + def test_cumsum(self): + result = self.frame.cumsum() + expected = SparseDataFrame(self.frame.to_dense().cumsum()) + tm.assert_sp_frame_equal(result, expected) + + def test_numpy_cumsum(self): + result = np.cumsum(self.frame, axis=0) + expected = SparseDataFrame(self.frame.to_dense().cumsum()) + tm.assert_sp_frame_equal(result, expected) + + msg = "the 'dtype' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + self.frame, dtype=np.int64) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + self.frame, out=result) + if __name__ == '__main__': import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 4c6c61cea25a9..9a53f50c6432e 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -476,6 +476,21 @@ def _compare(idx): exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) + def test_numpy_take(self): + sp = SparseSeries([1.0, 2.0, 3.0]) + indices = [1, 2] + + tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), + np.take(sp.to_dense(), indices, axis=0)) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.take, + sp, indices, out=np.empty(sp.shape)) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.take, + sp, indices, mode='clip') + def test_setitem(self): self.bseries[5] = 7. self.assertEqual(self.bseries[5], 7.) @@ -858,18 +873,6 @@ def test_shift_dtype_fill_value(self): tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse(fill_value=0)) - def test_cumsum(self): - result = self.bseries.cumsum() - expected = self.bseries.to_dense().cumsum() - tm.assertIsInstance(result, SparseSeries) - self.assertEqual(result.name, self.bseries.name) - tm.assert_series_equal(result.to_dense(), expected) - - result = self.zbseries.cumsum() - expected = self.zbseries.to_dense().cumsum() - tm.assertIsInstance(result, Series) - tm.assert_series_equal(result, expected) - def test_combine_first(self): s = self.bseries @@ -1216,6 +1219,46 @@ def _dense_series_compare(s, f): tm.assert_series_equal(result.to_dense(), dense_result) +class TestSparseSeriesAnalytics(tm.TestCase): + def setUp(self): + arr, index = _test_data1() + self.bseries = SparseSeries(arr, index=index, kind='block', + name='bseries') + + arr, index = _test_data1_zero() + self.zbseries = SparseSeries(arr, index=index, kind='block', + fill_value=0, name='zbseries') + + def test_cumsum(self): + result = self.bseries.cumsum() + expected = SparseSeries(self.bseries.to_dense().cumsum()) + tm.assert_sp_series_equal(result, expected) + + # TODO: gh-12855 - return a SparseSeries here + result = self.zbseries.cumsum() + expected = self.zbseries.to_dense().cumsum() + self.assertNotIsInstance(result, SparseSeries) + tm.assert_series_equal(result, expected) + + def test_numpy_cumsum(self): + result = np.cumsum(self.bseries) + expected = SparseSeries(self.bseries.to_dense().cumsum()) + tm.assert_sp_series_equal(result, expected) + + # TODO: gh-12855 - return a SparseSeries here + result = np.cumsum(self.zbseries) + expected = self.zbseries.to_dense().cumsum() + self.assertNotIsInstance(result, SparseSeries) + tm.assert_series_equal(result, expected) + + msg = "the 'dtype' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + self.bseries, dtype=np.int64) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.cumsum, + self.zbseries, out=result) + if __name__ == '__main__': import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index dbb461a5c9e15..20aaae586f14f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -738,6 +738,26 @@ def test_sem(self): self.assertFalse((result < 0).any()) nanops._USE_BOTTLENECK = True + def test_sort_invalid_kwargs(self): + df = DataFrame([1, 2, 3], columns=['a']) + + msg = "sort\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, df.sort, foo=2) + + # Neither of these should raise an error because they + # are explicit keyword arguments in the signature and + # hence should not be swallowed by the kwargs parameter + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.sort(axis=1) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.sort(kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, df.sort, order=2) + def test_skew(self): tm._skip_if_no_scipy() from scipy.stats import skew @@ -1903,7 +1923,7 @@ def test_numpy_round(self): expected = DataFrame([[2., 1.], [0., 7.]]) assert_frame_equal(out, expected) - msg = "Inplace rounding is not supported" + msg = "the 'out' parameter is not supported" with tm.assertRaisesRegexp(ValueError, msg): np.round(df, decimals=0, out=df) @@ -2070,3 +2090,7 @@ def test_dot(self): df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) assertRaisesRegexp(ValueError, 'aligned', df.dot, df2) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 3c4054b247e0e..cd2a0fbeefae3 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -972,7 +972,7 @@ def test_boolean_comparison(self): assert_numpy_array_equal(result, expected.values) self.assertRaises(ValueError, lambda: df == b_c) - self.assertFalse((df.values == b_c)) + self.assertFalse(np.array_equal(df.values, b_c)) # with alignment df = DataFrame(np.arange(6).reshape((3, 2)), @@ -1170,3 +1170,7 @@ def test_inplace_ops_identity(self): assert_frame_equal(df, expected) assert_frame_equal(df2, expected) self.assertIs(df._data, df2._data) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a6aaa69183f10..088e391d0a1c1 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -268,7 +268,7 @@ def test_compat(self): def test_argsort(self): for k, ind in self.indices.items(): - # sep teststed + # separately tested if k in ['catIndex']: continue @@ -276,6 +276,32 @@ def test_argsort(self): expected = np.array(ind).argsort() tm.assert_numpy_array_equal(result, expected) + def test_numpy_argsort(self): + for k, ind in self.indices.items(): + result = np.argsort(ind) + expected = ind.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas/indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if type(ind) in (CategoricalIndex, RangeIndex): + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, + np.argsort, ind, axis=1) + + msg = "the 'kind' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argsort, + ind, kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argsort, + ind, order=('a', 'b')) + def test_pickle(self): for ind in self.indices.values(): self.verify_pickle(ind) @@ -300,6 +326,43 @@ def test_take(self): with tm.assertRaises(AttributeError): ind.freq + def test_take_invalid_kwargs(self): + idx = self.create_index() + indices = [1, 2] + + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_repeat(self): + rep = 2 + i = self.create_index() + expected = pd.Index(i.values.repeat(rep), name=i.name) + tm.assert_index_equal(i.repeat(rep), expected) + + i = self.create_index() + rep = np.arange(len(i)) + expected = pd.Index(i.values.repeat(rep), name=i.name) + tm.assert_index_equal(i.repeat(rep), expected) + + def test_numpy_repeat(self): + rep = 2 + i = self.create_index() + expected = i.repeat(rep) + tm.assert_index_equal(np.repeat(i, rep), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, + i, rep, axis=0) + def test_setops_errorcases(self): for name, idx in compat.iteritems(self.indices): # # non-iterable input diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index da7084eff9fa3..d1ac4ff003509 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -21,7 +21,7 @@ CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.util.testing import assert_almost_equal -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat import pandas.core.config as cf diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index fa8f6a291c677..1d8a52e48e468 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -832,3 +832,19 @@ def test_take_fill_value_datetime(self): with tm.assertRaises(IndexError): idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = pd.CategoricalIndex([1, 2, 3], name='foo') + indices = [1, 0, -1] + + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c585fb1b1b21f..46180a823c002 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -78,6 +78,31 @@ def test_labels_dtypes(self): self.assertTrue((i.labels[0] >= 0).all()) self.assertTrue((i.labels[1] >= 0).all()) + def test_repeat(self): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + def test_numpy_repeat(self): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, m, reps, axis=1) + def test_set_name_methods(self): # so long as these are synonyms, we don't need to test set_names self.assertEqual(self.index.rename, self.index.set_names) @@ -472,7 +497,7 @@ def test_constructor_mismatched_label_levels(self): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] def assert_multiindex_copied(self, copy, original): - # levels shoudl be (at least, shallow copied) + # levels should be (at least, shallow copied) assert_copy(copy.levels, original.levels) assert_almost_equal(copy.labels, original.labels) @@ -1595,6 +1620,24 @@ def test_take_fill_value(self): with tm.assertRaises(IndexError): idx.take(np.array([1, -5])) + def take_invalid_kwargs(self): + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + indices = [1, 2] + + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + def test_join_level(self): def _check_how(other, how): join_index, lidx, ridx = other.join(self.index, how=how, diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fabc9306c3601..031385de7825d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -12,7 +12,7 @@ import pandas as pd from pandas import (Series, DataFrame, isnull, notnull, bdate_range, - date_range) + date_range, _np_version_under1p10) from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp from pandas.tseries.tdi import Timedelta @@ -500,13 +500,35 @@ def _check_accum_op(self, name): self.assert_numpy_array_equal(result, expected) + def test_compress(self): + cond = [True, False, True, False, False] + s = Series([1, -1, 5, 8, 7], + index=list('abcde'), name='foo') + expected = Series(s.values.compress(cond), + index=list('ac'), name='foo') + tm.assert_series_equal(s.compress(cond), expected) + + def test_numpy_compress(self): + cond = [True, False, True, False, False] + s = Series([1, -1, 5, 8, 7], + index=list('abcde'), name='foo') + expected = Series(s.values.compress(cond), + index=list('ac'), name='foo') + tm.assert_series_equal(np.compress(cond, s), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.compress, + cond, s, axis=1) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.compress, + cond, s, out=s) + def test_round(self): - # numpy.round doesn't preserve metadata, probably a numpy bug, - # re: GH #314 self.ts.index.name = "index_name" result = self.ts.round(2) - expected = Series(np.round(self.ts.values, 2), index=self.ts.index, - name='ts') + expected = Series(np.round(self.ts.values, 2), + index=self.ts.index, name='ts') assert_series_equal(result, expected) self.assertEqual(result.name, self.ts.name) @@ -517,7 +539,7 @@ def test_numpy_round(self): expected = Series([2., 1., 0.]) assert_series_equal(out, expected) - msg = "Inplace rounding is not supported" + msg = "the 'out' parameter is not supported" with tm.assertRaisesRegexp(ValueError, msg): np.round(s, decimals=0, out=s) @@ -1198,6 +1220,17 @@ def test_idxmin(self): result = s.idxmin() self.assertEqual(result, 1) + def test_numpy_argmin(self): + # argmin is aliased to idxmin + data = np.random.randint(0, 11, size=10) + result = np.argmin(Series(data)) + self.assertEqual(result, np.argmin(data)) + + if not _np_version_under1p10: + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argmin, + Series(data), out=data) + def test_idxmax(self): # test idxmax # _check_stat_op approach can not be used here because of isnull check. @@ -1242,6 +1275,17 @@ def test_idxmax(self): result = s.idxmin() self.assertEqual(result, 1.1) + def test_numpy_argmax(self): + # argmax is aliased to idxmax + data = np.random.randint(0, 11, size=10) + result = np.argmax(Series(data)) + self.assertEqual(result, np.argmax(data)) + + if not _np_version_under1p10: + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argmax, + Series(data), out=data) + def test_ptp(self): N = 1000 arr = np.random.randn(N) @@ -1295,6 +1339,15 @@ def test_repeat(self): index=s.index.values.repeat(to_rep)) assert_series_equal(reps, exp) + def test_numpy_repeat(self): + s = Series(np.arange(3), name='x') + expected = Series(s.values.repeat(2), name='x', + index=s.index.values.repeat(2)) + assert_series_equal(np.repeat(s, 2), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, s, 2, axis=0) + def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) @@ -1621,7 +1674,7 @@ def test_reshape_non_2d(self): result = a.reshape(2, 2) expected = a.values.reshape(2, 2) tm.assert_numpy_array_equal(result, expected) - self.assertTrue(type(result) is type(expected)) + self.assertIsInstance(result, type(expected)) def test_reshape_2d_return_array(self): x = Series(np.random.random(201), name='x') @@ -1635,6 +1688,26 @@ def test_reshape_2d_return_array(self): expected = x.reshape((-1, 1)) assert_almost_equal(result, expected) + def test_reshape_bad_kwarg(self): + a = Series([1, 2, 3, 4]) + + msg = "'foo' is an invalid keyword argument for this function" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) + + msg = "reshape\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) + + def test_numpy_reshape(self): + a = Series([1, 2, 3, 4]) + + result = np.reshape(a, (2, 2)) + expected = a.values.reshape(2, 2) + tm.assert_numpy_array_equal(result, expected) + self.assertIsInstance(result, type(expected)) + + result = np.reshape(a, a.shape) + tm.assert_series_equal(result, a) + def test_unstack(self): from numpy import nan diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 68864306525dc..151ded48dac0b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -11,7 +11,7 @@ import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas.hashtable as hashtable -from pandas.compat.numpy_compat import np_array_datetime64_compat +from pandas.compat.numpy import np_array_datetime64_compat class TestMatch(tm.TestCase): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1c5774a7e7e2e..b2f54bd8c41db 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -14,7 +14,7 @@ from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) from pandas.compat import u, StringIO -from pandas.compat.numpy_compat import np_array_datetime64_compat +from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, NoNewAttributesMixin) from pandas.tseries.base import DatetimeIndexOpsMixin @@ -993,6 +993,34 @@ def test_lookup_nan(self): self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs))) +class TestTranspose(Ops): + errmsg = "the 'axes' parameter is not supported" + + def test_transpose(self): + for obj in self.objs: + if isinstance(obj, Index): + tm.assert_index_equal(obj.transpose(), obj) + else: + tm.assert_series_equal(obj.transpose(), obj) + + def test_transpose_non_default_axes(self): + for obj in self.objs: + tm.assertRaisesRegexp(ValueError, self.errmsg, + obj.transpose, 1) + tm.assertRaisesRegexp(ValueError, self.errmsg, + obj.transpose, axes=1) + + def test_numpy_transpose(self): + for obj in self.objs: + if isinstance(obj, Index): + tm.assert_index_equal(np.transpose(obj), obj) + else: + tm.assert_series_equal(np.transpose(obj), obj) + + tm.assertRaisesRegexp(ValueError, self.errmsg, + np.transpose, obj, axes=1) + + class TestNoNewAttributesMixin(tm.TestCase): def test_mixin(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index ceeb61c5c5508..33b7850732230 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -509,6 +509,35 @@ def f(): res = cat_rev > "b" self.assert_numpy_array_equal(res, exp) + def test_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(c.argsort( + ascending=True), expected) + + expected = expected[::-1] + tm.assert_numpy_array_equal(c.argsort( + ascending=False), expected) + + def test_numpy_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected) + + msg = "the 'kind' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argsort, + c, kind='mergesort') + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argsort, + c, axis=0) + + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.argsort, + c, order='C') + def test_na_flags_int_categories(self): # #1457 @@ -3976,6 +4005,22 @@ def test_repeat(self): res = cat.repeat(2) self.assert_categorical_equal(res, exp) + def test_numpy_repeat(self): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) + self.assert_categorical_equal(np.repeat(cat, 2), exp) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1) + + def test_numpy_reshape(self): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) + + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.reshape, + cat, cat.shape, order='F') + def test_na_actions(self): cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 46678a72688aa..56838184a3670 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -546,14 +546,15 @@ def test_unexpected_keyword(self): # GH8597 def test_stat_unexpected_keyword(self): obj = self._construct(5) starwars = 'Star Wars' + errmsg = 'unexpected keyword' - with assertRaisesRegexp(TypeError, 'unexpected keyword'): + with assertRaisesRegexp(TypeError, errmsg): obj.max(epic=starwars) # stat_function - with assertRaisesRegexp(TypeError, 'unexpected keyword'): + with assertRaisesRegexp(TypeError, errmsg): obj.var(epic=starwars) # stat_function_ddof - with assertRaisesRegexp(TypeError, 'unexpected keyword'): + with assertRaisesRegexp(TypeError, errmsg): obj.sum(epic=starwars) # cum_function - with assertRaisesRegexp(TypeError, 'unexpected keyword'): + with assertRaisesRegexp(TypeError, errmsg): obj.any(epic=starwars) # logical_function def test_api_compat(self): @@ -568,6 +569,69 @@ def test_api_compat(self): if PY3: self.assertTrue(f.__qualname__.endswith(func)) + def test_stat_non_defaults_args(self): + obj = self._construct(5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with assertRaisesRegexp(ValueError, errmsg): + obj.max(out=out) # stat_function + with assertRaisesRegexp(ValueError, errmsg): + obj.var(out=out) # stat_function_ddof + with assertRaisesRegexp(ValueError, errmsg): + obj.sum(out=out) # cum_function + with assertRaisesRegexp(ValueError, errmsg): + obj.any(out=out) # logical_function + + def test_clip(self): + lower = 1 + upper = 3 + col = np.arange(5) + + obj = self._construct(len(col), value=col) + + if isinstance(obj, Panel): + msg = "clip is not supported yet for panels" + tm.assertRaisesRegexp(NotImplementedError, msg, + obj.clip, lower=lower, + upper=upper) + + else: + out = obj.clip(lower=lower, upper=upper) + expected = self._construct(len(col), value=col + .clip(lower, upper)) + self._compare(out, expected) + + bad_axis = 'foo' + msg = ('No axis named {axis} ' + 'for object').format(axis=bad_axis) + assertRaisesRegexp(ValueError, msg, obj.clip, + lower=lower, upper=upper, + axis=bad_axis) + + def test_numpy_clip(self): + lower = 1 + upper = 3 + col = np.arange(5) + + obj = self._construct(len(col), value=col) + + if isinstance(obj, Panel): + msg = "clip is not supported yet for panels" + tm.assertRaisesRegexp(NotImplementedError, msg, + np.clip, obj, + lower, upper) + else: + out = np.clip(obj, lower, upper) + expected = self._construct(len(col), value=col + .clip(lower, upper)) + self._compare(out, expected) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, + np.clip, obj, + lower, upper, out=col) + class TestSeries(tm.TestCase, Generic): _typ = Series @@ -2124,6 +2188,114 @@ def test_squeeze(self): [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=['A']) + tm.assert_series_equal(np.squeeze(df), df['A']) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, + np.squeeze, s, axis=0) + + def test_transpose(self): + msg = ("transpose\(\) got multiple values for " + "keyword argument 'axes'") + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + # calls implementation in pandas/core/base.py + tm.assert_series_equal(s.transpose(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.transpose().transpose(), df) + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.transpose(2, 0, 1) + .transpose(1, 2, 0), p) + tm.assertRaisesRegexp(TypeError, msg, p.transpose, + 2, 0, 1, axes=(2, 0, 1)) + for p4d in [tm.makePanel4D()]: + tm.assert_panel4d_equal(p4d.transpose(2, 0, 3, 1) + .transpose(1, 3, 0, 2), p4d) + tm.assertRaisesRegexp(TypeError, msg, p4d.transpose, + 2, 0, 3, 1, axes=(2, 0, 3, 1)) + + def test_numpy_transpose(self): + msg = "the 'axes' parameter is not supported" + + s = tm.makeFloatSeries() + tm.assert_series_equal( + np.transpose(s), s) + tm.assertRaisesRegexp(ValueError, msg, + np.transpose, s, axes=1) + + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(np.transpose( + np.transpose(df)), df) + tm.assertRaisesRegexp(ValueError, msg, + np.transpose, df, axes=1) + + p = tm.makePanel() + tm.assert_panel_equal(np.transpose( + np.transpose(p, axes=(2, 0, 1)), + axes=(1, 2, 0)), p) + + p4d = tm.makePanel4D() + tm.assert_panel4d_equal(np.transpose( + np.transpose(p4d, axes=(2, 0, 3, 1)), + axes=(1, 3, 0, 2)), p4d) + + def test_take(self): + indices = [1, 5, -2, 6, 3, -1] + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + out = s.take(indices) + expected = Series(data=s.values.take(indices), + index=s.index.take(indices)) + tm.assert_series_equal(out, expected) + for df in [tm.makeTimeDataFrame()]: + out = df.take(indices) + expected = DataFrame(data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns) + tm.assert_frame_equal(out, expected) + + indices = [-3, 2, 0, 1] + for p in [tm.makePanel()]: + out = p.take(indices) + expected = Panel(data=p.values.take(indices, axis=0), + items=p.items.take(indices), + major_axis=p.major_axis, + minor_axis=p.minor_axis) + tm.assert_panel_equal(out, expected) + for p4d in [tm.makePanel4D()]: + out = p4d.take(indices) + expected = Panel4D(data=p4d.values.take(indices, axis=0), + labels=p4d.labels.take(indices), + major_axis=p4d.major_axis, + minor_axis=p4d.minor_axis, + items=p4d.items) + tm.assert_panel4d_equal(out, expected) + + def test_take_invalid_kwargs(self): + indices = [-3, 2, 0, 1] + s = tm.makeFloatSeries() + df = tm.makeTimeDataFrame() + p = tm.makePanel() + p4d = tm.makePanel4D() + + for obj in (s, df, p, p4d): + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, obj.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, obj.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, obj.take, + indices, mode='clip') + def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index a6516614e9965..87401f272adbd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -251,6 +251,12 @@ def test_get_axis_number(self): self.assertEqual(self.panel._get_axis_number('major'), 1) self.assertEqual(self.panel._get_axis_number('minor'), 2) + with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + self.panel._get_axis_number('foo') + + with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + self.panel.__ge__(self.panel, axis='foo') + def test_get_axis_name(self): self.assertEqual(self.panel._get_axis_name(0), 'items') self.assertEqual(self.panel._get_axis_name(1), 'major_axis') @@ -2016,6 +2022,25 @@ def test_round(self): result = p.round() self.assert_panel_equal(expected, result) + def test_numpy_round(self): + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = np.round(p) + self.assert_panel_equal(expected, result) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) + def test_multiindex_get(self): ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], names=['first', 'second']) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index e87e9770b770a..d6baa720bac19 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- import nose +from collections import OrderedDict from pandas.util._move import move_into_mutable_buffer, BadMove from pandas.util.decorators import deprecate_kwarg -from pandas.util.validators import validate_args, validate_kwargs +from pandas.util.validators import (validate_args, validate_kwargs, + validate_args_and_kwargs) import pandas.util.testing as tm @@ -78,78 +80,219 @@ def test_rands_array(): class TestValidateArgs(tm.TestCase): + fname = 'func' - def test_bad_min_length(self): - msg = "'min_length' must be non-negative" + def test_bad_min_fname_arg_count(self): + msg = "'max_fname_arg_count' must be non-negative" with tm.assertRaisesRegexp(ValueError, msg): - validate_args((None,), min_length=-1, max_length=5) + validate_args(self.fname, (None,), -1, 'foo') - def test_bad_arg_length_no_max(self): - min_length = 5 - msg = "expected at least {min_length} arguments".format( - min_length=min_length) + def test_bad_arg_length_max_value_single(self): + args = (None, None) + compat_args = ('foo',) - with tm.assertRaisesRegexp(ValueError, msg): - validate_args((None,), min_length=min_length, max_length=None) + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ("{fname}\(\) takes at most {max_length} " + "argument \({actual_length} given\)" + .format(fname=self.fname, max_length=max_length, + actual_length=actual_length)) - def test_bad_arg_length_with_max(self): - min_length = 5 - max_length = 10 - msg = ("expected between {min_length} and {max_length}" - " arguments inclusive".format(min_length=min_length, - max_length=max_length)) + with tm.assertRaisesRegexp(TypeError, msg): + validate_args(self.fname, args, + min_fname_arg_count, + compat_args) + + def test_bad_arg_length_max_value_multiple(self): + args = (None, None) + compat_args = dict(foo=None) + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ("{fname}\(\) takes at most {max_length} " + "arguments \({actual_length} given\)" + .format(fname=self.fname, max_length=max_length, + actual_length=actual_length)) - with tm.assertRaisesRegexp(ValueError, msg): - validate_args((None,), min_length=min_length, - max_length=max_length) + with tm.assertRaisesRegexp(TypeError, msg): + validate_args(self.fname, args, + min_fname_arg_count, + compat_args) - def test_bad_min_max_length(self): - msg = "'min_length' > 'max_length'" - with tm.assertRaisesRegexp(ValueError, msg): - validate_args((None,), min_length=5, max_length=2) + def test_not_all_defaults(self): + bad_arg = 'foo' + msg = ("the '{arg}' parameter is not supported " + "in the pandas implementation of {func}\(\)". + format(arg=bad_arg, func=self.fname)) - def test_not_all_none(self): - msg = "All arguments must be None" - with tm.assertRaisesRegexp(ValueError, msg): - validate_args(('foo',), min_length=0, - max_length=1, msg=msg) + compat_args = OrderedDict() + compat_args['foo'] = 2 + compat_args['bar'] = -1 + compat_args['baz'] = 3 - with tm.assertRaisesRegexp(ValueError, msg): - validate_args(('foo', 'bar', 'baz'), min_length=2, - max_length=5, msg=msg) + arg_vals = (1, -1, 3) - with tm.assertRaisesRegexp(ValueError, msg): - validate_args((None, 'bar', None), min_length=2, - max_length=5, msg=msg) + for i in range(1, 3): + with tm.assertRaisesRegexp(ValueError, msg): + validate_args(self.fname, arg_vals[:i], 2, compat_args) def test_validation(self): # No exceptions should be thrown - validate_args((None,), min_length=0, max_length=1) - validate_args((None, None), min_length=1, max_length=5) + validate_args(self.fname, (None,), 2, dict(out=None)) + + compat_args = OrderedDict() + compat_args['axis'] = 1 + compat_args['out'] = None + + validate_args(self.fname, (1, None), 2, compat_args) class TestValidateKwargs(tm.TestCase): + fname = 'func' def test_bad_kwarg(self): goodarg = 'f' badarg = goodarg + 'o' + compat_args = OrderedDict() + compat_args[goodarg] = 'foo' + compat_args[badarg + 'o'] = 'bar' kwargs = {goodarg: 'foo', badarg: 'bar'} - compat_args = (goodarg, badarg + 'o') - fname = 'func' - msg = ("{fname}\(\) got an unexpected " "keyword argument '{arg}'".format( - fname=fname, arg=badarg)) + fname=self.fname, arg=badarg)) + + with tm.assertRaisesRegexp(TypeError, msg): + validate_kwargs(self.fname, kwargs, compat_args) + + def test_not_all_none(self): + bad_arg = 'foo' + msg = ("the '{arg}' parameter is not supported " + "in the pandas implementation of {func}\(\)". + format(arg=bad_arg, func=self.fname)) + + compat_args = OrderedDict() + compat_args['foo'] = 1 + compat_args['bar'] = 's' + compat_args['baz'] = None + + kwarg_keys = ('foo', 'bar', 'baz') + kwarg_vals = (2, 's', None) + + for i in range(1, 3): + kwargs = dict(zip(kwarg_keys[:i], + kwarg_vals[:i])) + + with tm.assertRaisesRegexp(ValueError, msg): + validate_kwargs(self.fname, kwargs, compat_args) + + def test_validation(self): + # No exceptions should be thrown + compat_args = OrderedDict() + compat_args['f'] = None + compat_args['b'] = 1 + compat_args['ba'] = 's' + kwargs = dict(f=None, b=1) + validate_kwargs(self.fname, kwargs, compat_args) + + +class TestValidateKwargsAndArgs(tm.TestCase): + fname = 'func' + + def test_invalid_total_length_max_length_one(self): + compat_args = ('foo',) + kwargs = {'foo': 'FOO'} + args = ('FoO', 'BaZ') + + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + msg = ("{fname}\(\) takes at most {max_length} " + "argument \({actual_length} given\)" + .format(fname=self.fname, max_length=max_length, + actual_length=actual_length)) + + with tm.assertRaisesRegexp(TypeError, msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, + compat_args) + + def test_invalid_total_length_max_length_multiple(self): + compat_args = ('foo', 'bar', 'baz') + kwargs = {'foo': 'FOO', 'bar': 'BAR'} + args = ('FoO', 'BaZ') + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + msg = ("{fname}\(\) takes at most {max_length} " + "arguments \({actual_length} given\)" + .format(fname=self.fname, max_length=max_length, + actual_length=actual_length)) + + with tm.assertRaisesRegexp(TypeError, msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, + compat_args) + + def test_no_args_with_kwargs(self): + bad_arg = 'bar' + min_fname_arg_count = 2 + + compat_args = OrderedDict() + compat_args['foo'] = -5 + compat_args[bad_arg] = 1 + + msg = ("the '{arg}' parameter is not supported " + "in the pandas implementation of {func}\(\)". + format(arg=bad_arg, func=self.fname)) + + args = () + kwargs = {'foo': -5, bad_arg: 2} + tm.assertRaisesRegexp(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) + + args = (-5, 2) + kwargs = {} + tm.assertRaisesRegexp(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) + + def test_duplicate_argument(self): + min_fname_arg_count = 2 + compat_args = OrderedDict() + compat_args['foo'] = None + compat_args['bar'] = None + compat_args['baz'] = None + kwargs = {'foo': None, 'bar': None} + args = (None,) # duplicate value for 'foo' + + msg = ("{fname}\(\) got multiple values for keyword " + "argument '{arg}'".format(fname=self.fname, arg='foo')) with tm.assertRaisesRegexp(TypeError, msg): - validate_kwargs(fname, kwargs, *compat_args) + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, + compat_args) def test_validation(self): # No exceptions should be thrown - compat_args = ('f', 'b', 'ba') - kwargs = {'f': 'foo', 'b': 'bar'} - validate_kwargs('func', kwargs, *compat_args) + compat_args = OrderedDict() + compat_args['foo'] = 1 + compat_args['bar'] = None + compat_args['baz'] = -2 + kwargs = {'baz': -2} + args = (1, None) + + min_fname_arg_count = 2 + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, + compat_args) class TestMove(tm.TestCase): diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 059c77d21b4df..185d806a64fe8 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -6,6 +6,8 @@ from datetime import datetime, timedelta from pandas import compat +from pandas.compat.numpy import function as nv + import numpy as np from pandas.core import common as com, algorithms from pandas.core.common import (is_integer, is_float, is_bool_dtype, @@ -88,7 +90,7 @@ def _round(self, freq, rounder): return result @Appender(_round_doc % "round") - def round(self, freq): + def round(self, freq, *args, **kwargs): return self._round(freq, np.round) @Appender(_round_doc % "floor") @@ -294,7 +296,9 @@ def sort_values(self, return_indexer=False, ascending=True): return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take']) - def take(self, indices, axis=0, allow_fill=True, fill_value=None): + def take(self, indices, axis=0, allow_fill=True, + fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) indices = com._ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) @@ -373,14 +377,17 @@ def tolist(self): """ return list(self.asobject) - def min(self, axis=None): + def min(self, axis=None, *args, **kwargs): """ - return the minimum value of the Index + Return the minimum value of the Index or minimum along + an axis. See also -------- numpy.ndarray.min """ + nv.validate_min(args, kwargs) + try: i8 = self.asi8 @@ -397,14 +404,17 @@ def min(self, axis=None): except ValueError: return self._na_value - def argmin(self, axis=None): + def argmin(self, axis=None, *args, **kwargs): """ - return a ndarray of the minimum argument indexer + Returns the indices of the minimum values along an axis. + See `numpy.ndarray.argmin` for more information on the + `axis` parameter. See also -------- numpy.ndarray.argmin """ + nv.validate_argmin(args, kwargs) i8 = self.asi8 if self.hasnans: @@ -415,14 +425,17 @@ def argmin(self, axis=None): i8[mask] = np.iinfo('int64').max return i8.argmin() - def max(self, axis=None): + def max(self, axis=None, *args, **kwargs): """ - return the maximum value of the Index + Return the maximum value of the Index or maximum along + an axis. See also -------- numpy.ndarray.max """ + nv.validate_max(args, kwargs) + try: i8 = self.asi8 @@ -439,14 +452,17 @@ def max(self, axis=None): except ValueError: return self._na_value - def argmax(self, axis=None): + def argmax(self, axis=None, *args, **kwargs): """ - return a ndarray of the maximum argument indexer + Returns the indices of the maximum values along an axis. + See `numpy.ndarray.argmax` for more information on the + `axis` parameter. See also -------- numpy.ndarray.argmax """ + nv.validate_argmax(args, kwargs) i8 = self.asi8 if self.hasnans: @@ -688,10 +704,11 @@ def unique(self): return self._simple_new(result, name=self.name, freq=self.freq, tz=getattr(self, 'tz', None)) - def repeat(self, repeats, axis=None): + def repeat(self, repeats, *args, **kwargs): """ Analogous to ndarray.repeat """ + nv.validate_repeat(args, kwargs) return self._shallow_copy(self.values.repeat(repeats), freq=None) def summary(self, name=None): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index da04acf6446af..478b25568d471 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -20,6 +20,7 @@ _values_from_object, ABCSeries, is_integer, is_float, is_object_dtype) from pandas import compat +from pandas.compat.numpy import function as nv from pandas.util.decorators import Appender, cache_readonly, Substitution from pandas.lib import Timedelta import pandas.lib as lib @@ -891,14 +892,16 @@ def append(self, other): for x in to_concat] return Index(com._concat_compat(to_concat), name=name) - def repeat(self, n): + def repeat(self, n, *args, **kwargs): """ - Return a new Index of the values repeated n times. + Return a new Index of the values repeated `n` times. See also -------- numpy.ndarray.repeat """ + nv.validate_repeat(args, kwargs) + # overwrites method from DatetimeIndexOpsMixin return self._shallow_copy(self.values.repeat(n)) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index b166dd71b67ae..3b10cae1ddca2 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -2,9 +2,10 @@ from datetime import datetime, timedelta import numpy as np import pandas as pd -from pandas import (Series, Index, Int64Index, Timestamp, DatetimeIndex, - PeriodIndex, TimedeltaIndex, Timedelta, timedelta_range, - date_range, Float64Index) +from pandas import (Series, Index, Int64Index, Timestamp, Period, + DatetimeIndex, PeriodIndex, TimedeltaIndex, + Timedelta, timedelta_range, date_range, Float64Index, + _np_version_under1p10) import pandas.tslib as tslib import pandas.tseries.period as period @@ -85,10 +86,10 @@ def test_astype_str(self): def test_asobject_tolist(self): idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') - expected_list = [pd.Timestamp('2013-01-31'), - pd.Timestamp('2013-02-28'), - pd.Timestamp('2013-03-31'), - pd.Timestamp('2013-04-30')] + expected_list = [Timestamp('2013-01-31'), + Timestamp('2013-02-28'), + Timestamp('2013-03-31'), + Timestamp('2013-04-30')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject self.assertTrue(isinstance(result, Index)) @@ -100,10 +101,10 @@ def test_asobject_tolist(self): idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') - expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'), - pd.Timestamp('2013-02-28', tz='Asia/Tokyo'), - pd.Timestamp('2013-03-31', tz='Asia/Tokyo'), - pd.Timestamp('2013-04-30', tz='Asia/Tokyo')] + expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), + Timestamp('2013-02-28', tz='Asia/Tokyo'), + Timestamp('2013-03-31', tz='Asia/Tokyo'), + Timestamp('2013-04-30', tz='Asia/Tokyo')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject self.assertTrue(isinstance(result, Index)) @@ -114,9 +115,9 @@ def test_asobject_tolist(self): idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [pd.Timestamp('2013-01-01'), - pd.Timestamp('2013-01-02'), pd.NaT, - pd.Timestamp('2013-01-04')] + expected_list = [Timestamp('2013-01-01'), + Timestamp('2013-01-02'), pd.NaT, + Timestamp('2013-01-04')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject self.assertTrue(isinstance(result, Index)) @@ -138,8 +139,8 @@ def test_minmax(self): self.assertFalse(idx2.is_monotonic) for idx in [idx1, idx2]: - self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz)) + self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) + self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) self.assertEqual(idx.argmin(), 0) self.assertEqual(idx.argmax(), 2) @@ -154,6 +155,86 @@ def test_minmax(self): obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) self.assertTrue(pd.isnull(getattr(obj, op)())) + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + self.assertEqual(np.min(dr), Timestamp( + '2016-01-15 00:00:00', offset='D')) + self.assertEqual(np.max(dr), Timestamp( + '2016-01-20 00:00:00', offset='D')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) + + self.assertEqual(np.argmin(dr), 0) + self.assertEqual(np.argmax(dr), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) + + def test_round(self): + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=5, + freq='30Min', tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = "Could not evaluate foo" + tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='foo') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_repeat(self): + reps = 2 + + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + ]) + + tm.assert_index_equal(rng.repeat(reps), expected_rng) + + def test_numpy_repeat(self): + reps = 2 + msg = "the 'axis' parameter is not supported" + + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + ]) + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + tm.assertRaisesRegexp(ValueError, msg, np.repeat, + rng, reps, axis=1) + def test_representation(self): idx = [] @@ -345,10 +426,10 @@ def test_add_iadd(self): idx = DatetimeIndex(['2011-01-01', '2011-01-02']) msg = "cannot add a datelike to a DatetimeIndex" with tm.assertRaisesRegexp(TypeError, msg): - idx + pd.Timestamp('2011-01-01') + idx + Timestamp('2011-01-01') with tm.assertRaisesRegexp(TypeError, msg): - pd.Timestamp('2011-01-01') + idx + Timestamp('2011-01-01') + idx def test_sub_isub(self): for tz in self.tz: @@ -515,7 +596,7 @@ def test_getitem(self): for idx in [idx1, idx2]: result = idx[0] - self.assertEqual(result, pd.Timestamp('2011-01-01', tz=idx.tz)) + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) result = idx[0:5] expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', @@ -563,7 +644,7 @@ def test_take(self): for idx in [idx1, idx2]: result = idx.take([0]) - self.assertEqual(result, pd.Timestamp('2011-01-01', tz=idx.tz)) + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) result = idx.take([0, 1, 2]) expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', @@ -597,6 +678,22 @@ def test_take(self): self.assert_index_equal(result, expected) self.assertIsNone(result.freq) + def test_take_invalid_kwargs(self): + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + def test_infer_freq(self): # GH 11018 for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', @@ -699,6 +796,49 @@ def test_minmax(self): obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) self.assertTrue(pd.isnull(getattr(obj, op)())) + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + td = TimedeltaIndex(np.asarray(dr)) + + self.assertEqual(np.min(td), Timedelta('16815 days')) + self.assertEqual(np.max(td), Timedelta('16820 days')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + + self.assertEqual(np.argmin(td), 0) + self.assertEqual(np.argmax(td), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) + + def test_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([ + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = "Could not evaluate foo" + tm.assertRaisesRegexp(ValueError, msg, td.round, freq='foo') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + def test_representation(self): idx1 = TimedeltaIndex([], freq='D') idx2 = TimedeltaIndex(['1 days'], freq='D') @@ -838,7 +978,7 @@ def test_sub_isub(self): idx = TimedeltaIndex(['1 day', '2 day']) msg = "cannot subtract a datelike from a TimedeltaIndex" with tm.assertRaisesRegexp(TypeError, msg): - idx - pd.Timestamp('2011-01-01') + idx - Timestamp('2011-01-01') result = Timestamp('2011-01-01') + idx expected = DatetimeIndex(['2011-01-02', '2011-01-03']) @@ -1287,6 +1427,22 @@ def test_take(self): self.assert_index_equal(result, expected) self.assertIsNone(result.freq) + def test_take_invalid_kwargs(self): + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = "take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + def test_infer_freq(self): # GH 11018 for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' @@ -1418,6 +1574,24 @@ def test_minmax(self): self.assertEqual(result.ordinal, tslib.iNaT) self.assertEqual(result.freq, 'M') + def test_numpy_minmax(self): + pr = pd.period_range(start='2016-01-15', end='2016-01-20') + + self.assertEqual(np.min(pr), Period('2016-01-15', freq='D')) + self.assertEqual(np.max(pr), Period('2016-01-20', freq='D')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, pr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, pr, out=0) + + self.assertEqual(np.argmin(pr), 0) + self.assertEqual(np.argmax(pr), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, pr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, pr, out=0) + def test_representation(self): # GH 7601 idx1 = PeriodIndex([], freq='D') @@ -2162,3 +2336,10 @@ def test_shift(self): exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00' '2011-01-01 09:00'], name='xxx', freq='H') tm.assert_index_equal(idx.shift(-3), exp) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index c50e3fa7b5174..f2c20f7d3111d 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -8,7 +8,7 @@ from pandas.compat import u import pandas.util.testing as tm from pandas.tseries.offsets import Second, Milli, Micro -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat try: import pandas.tseries.converter as converter diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index fe025d2249add..0e91e396965fa 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -8,7 +8,7 @@ import numpy as np -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat from pandas.core.datetools import (bday, BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, CustomBusinessHour, CBMonthEnd, CBMonthBegin, diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 78f84aa243cd9..12ba0b1b1bd9b 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -23,13 +23,13 @@ import numpy as np from numpy.random import randn from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat from pandas import (Series, DataFrame, _np_version_under1p9, _np_version_under1p12) from pandas import tslib -from pandas.util.testing import (assert_series_equal, assert_almost_equal, - assertRaisesRegexp) +from pandas.util.testing import (assert_index_equal, assert_series_equal, + assert_almost_equal, assertRaisesRegexp) import pandas.util.testing as tm @@ -2289,6 +2289,28 @@ def test_constructor(self): vals = np.array(vals) self.assertRaises(ValueError, PeriodIndex, vals) + def test_repeat(self): + index = period_range('20010101', periods=2) + expected = PeriodIndex([ + Period('2001-01-01'), Period('2001-01-01'), + Period('2001-01-02'), Period('2001-01-02'), + ]) + + assert_index_equal(index.repeat(2), expected) + + def test_numpy_repeat(self): + index = period_range('20010101', periods=2) + expected = PeriodIndex([ + Period('2001-01-01'), Period('2001-01-01'), + Period('2001-01-02'), Period('2001-01-02'), + ]) + + assert_index_equal(np.repeat(index, 2), expected) + + msg = "the 'axis' parameter is not supported" + assertRaisesRegexp(ValueError, msg, np.repeat, + index, 2, axis=1) + def test_shift(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index d5accc2a65eb8..034c31b33bce8 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -26,7 +26,7 @@ DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index, NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) from pandas.compat import range, long, StringIO, lrange, lmap, zip, product -from pandas.compat.numpy_compat import np_datetime64_compat +from pandas.compat.numpy import np_datetime64_compat from pandas.core.common import PerformanceWarning from pandas.tslib import iNaT from pandas.util.testing import ( diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index e4f91b25777a3..b2311bf4d6661 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -19,8 +19,8 @@ import pandas.tseries.offsets as offsets import pandas.util.testing as tm import pandas.compat as compat -from pandas.compat.numpy_compat import (np_datetime64_compat, - np_array_datetime64_compat) +from pandas.compat.numpy import (np_datetime64_compat, + np_array_datetime64_compat) from pandas.util.testing import assert_series_equal, _skip_if_has_locale diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 8446cc097719f..56c0dc875f7bf 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -332,11 +332,19 @@ class Timestamp(_Timestamp): def round(self, freq): """ - return a new Timestamp rounded to this resolution + Round the Timestamp to the specified resolution + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` Parameters ---------- freq : a freq string indicating the rounding resolution + + Raises + ------ + ValueError if the freq cannot be converted """ return self._round(freq, np.round) @@ -1391,7 +1399,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, try: ts = parse_datetime_string(ts, dayfirst=dayfirst, yearfirst=yearfirst) except Exception: - raise ValueError + raise ValueError("could not convert string to Timestamp") return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) @@ -2581,12 +2589,19 @@ class Timedelta(_Timedelta): def round(self, freq): """ - return a new Timedelta rounded to this resolution. + Round the Timedelta to the specified resolution + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` Parameters ---------- freq : a freq string indicating the rounding resolution + + Raises + ------ + ValueError if the freq cannot be converted """ return self._round(freq, np.round) diff --git a/pandas/types/generic.py b/pandas/types/generic.py index af3f735f4932b..0d576eed43d45 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -39,7 +39,7 @@ def _check(cls, inst): ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) -ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel", )) +ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel", "panel4d")) ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", ('sparse_series', 'sparse_time_series')) diff --git a/pandas/util/validators.py b/pandas/util/validators.py index f308a04165d74..2166dc45db605 100644 --- a/pandas/util/validators.py +++ b/pandas/util/validators.py @@ -4,73 +4,124 @@ """ -def validate_args(args, min_length=0, max_length=None, msg=""): +def _check_arg_length(fname, args, max_fname_arg_count, compat_args): + """ + Checks whether 'args' has length of at most 'compat_args'. Raises + a TypeError if that is not the case, similar to in Python when a + function is called with too many arguments. + + """ + if max_fname_arg_count < 0: + raise ValueError("'max_fname_arg_count' must be non-negative") + + if len(args) > len(compat_args): + max_arg_count = len(compat_args) + max_fname_arg_count + actual_arg_count = len(args) + max_fname_arg_count + argument = 'argument' if max_arg_count == 1 else 'arguments' + + raise TypeError( + "{fname}() takes at most {max_arg} {argument} " + "({given_arg} given)".format( + fname=fname, max_arg=max_arg_count, + argument=argument, given_arg=actual_arg_count)) + + +def _check_for_default_values(fname, arg_val_dict, compat_args): + """ + Check that the keys in `arg_val_dict` are mapped to their + default values as specified in `compat_args`. + + Note that this function is to be called only when it has been + checked that arg_val_dict.keys() is a subset of compat_args + + """ + from pandas.core.common import is_bool + + for key in arg_val_dict: + # try checking equality directly with '=' operator, + # as comparison may have been overriden for the left + # hand object + try: + match = (arg_val_dict[key] == compat_args[key]) + + if not is_bool(match): + raise ValueError("'match' is not a boolean") + + # could not compare them directly, so try comparison + # using the 'is' operator + except: + match = (arg_val_dict[key] is compat_args[key]) + + if not match: + raise ValueError(("the '{arg}' parameter is not " + "supported in the pandas " + "implementation of {fname}()". + format(fname=fname, arg=key))) + + +def validate_args(fname, args, max_fname_arg_count, compat_args): """ Checks whether the length of the `*args` argument passed into a function - has at least `min_length` arguments. If `max_length` is an integer, checks - whether `*args` has at most `max_length` arguments inclusive. Raises a - ValueError if any of the aforementioned conditions are False. + has at most `len(compat_args)` arguments and whether or not all of these + elements in `args` are set to their default values. + + fname: str + The name of the function being passed the `*args` parameter - Parameters - ---------- args: tuple The `*args` parameter passed into a function - min_length: int, optional - The minimum number of arguments that should be contained in the `args`. - tuple. This number must be non-negative. The default is '0'. + max_fname_arg_count: int + The maximum number of arguments that the function `fname` + can accept, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. - max_length: int, optional - If not `None`, the maximum number of arguments that should be contained - in the `args` parameter. This number must be at least as large as the - provided `min_length` value. The default is None. - - msg: str, optional - Error message to display when a custom check of args fails. For - example, pandas does not support a non-None argument for `out` - when rounding a `Series` or `DataFrame` object. `msg` in this - case can be "Inplace rounding is not supported". + compat_args: OrderedDict + A ordered dictionary of keys and their associated default values. + In order to accommodate buggy behaviour in some versions of `numpy`, + where a signature displayed keyword arguments but then passed those + arguments **positionally** internally when calling downstream + implementations, an ordered dictionary ensures that the original + order of the keyword arguments is enforced. Note that if there is + only one key, a generic dict can be passed in as well. Raises ------ - ValueError if `args` fails to have a length that is at least `min_length` - and at most `max_length` inclusive (provided `max_length` is not None) + TypeError if `args` contains more values than there are `compat_args` + ValueError if `args` contains values that do not correspond to those + of the default values specified in `compat_args` """ - length = len(args) - - if min_length < 0: - raise ValueError("'min_length' must be non-negative") + _check_arg_length(fname, args, max_fname_arg_count, compat_args) - if max_length is None: - if length < min_length: - raise ValueError(("expected at least {min_length} arguments " - "but got {length} arguments instead". - format(min_length=min_length, length=length))) + # We do this so that we can provide a more informative + # error message about the parameters that we are not + # supporting in the pandas implementation of 'fname' + kwargs = dict(zip(compat_args, args)) + _check_for_default_values(fname, kwargs, compat_args) - if min_length > max_length: - raise ValueError("'min_length' > 'max_length'") - if (length < min_length) or (length > max_length): - raise ValueError(("expected between {min_length} and {max_length} " - "arguments inclusive but got {length} arguments " - "instead".format(min_length=min_length, - length=length, - max_length=max_length))) +def _check_for_invalid_keys(fname, kwargs, compat_args): + """ + Checks whether 'kwargs' contains any keys that are not + in 'compat_args' and raises a TypeError if there is one. - # See gh-12600; this is to allow compatibility with NumPy, - # which passes in an 'out' parameter as a positional argument - if args: - args = list(filter(lambda elt: elt is not None, args)) + """ + # set(dict) --> set of the dictionary's keys + diff = set(kwargs) - set(compat_args) - if args: - raise ValueError(msg) + if diff: + bad_arg = list(diff)[0] + raise TypeError(("{fname}() got an unexpected " + "keyword argument '{arg}'". + format(fname=fname, arg=bad_arg))) -def validate_kwargs(fname, kwargs, *compat_args): +def validate_kwargs(fname, kwargs, compat_args): """ Checks whether parameters passed to the **kwargs argument in a - function 'fname' are valid parameters as specified in *compat_args + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. Parameters ---------- @@ -80,18 +131,78 @@ def validate_kwargs(fname, kwargs, *compat_args): kwargs: dict The `**kwargs` parameter passed into `fname` - compat_args: *args - A tuple of keys that `kwargs` is allowed to have + compat_args: dict + A dictionary of keys that `kwargs` is allowed to have and their + associated default values Raises ------ - ValueError if `kwargs` contains keys not in `compat_args` + TypeError if `kwargs` contains keys not in `compat_args` + ValueError if `kwargs` contains keys in `compat_args` that do not + map to the default values specified in `compat_args` """ - list(map(kwargs.__delitem__, filter( - kwargs.__contains__, compat_args))) - if kwargs: - bad_arg = list(kwargs)[0] # first 'key' element - raise TypeError(("{fname}() got an unexpected " - "keyword argument '{arg}'". - format(fname=fname, arg=bad_arg))) + kwds = kwargs.copy() + _check_for_invalid_keys(fname, kwargs, compat_args) + _check_for_default_values(fname, kwds, compat_args) + + +def validate_args_and_kwargs(fname, args, kwargs, + max_fname_arg_count, + compat_args): + """ + Checks whether parameters passed to the *args and **kwargs argument in a + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. + + Parameters + ---------- + fname: str + The name of the function being passed the `**kwargs` parameter + + args: tuple + The `*args` parameter passed into a function + + kwargs: dict + The `**kwargs` parameter passed into `fname` + + max_fname_arg_count: int + The minimum number of arguments that the function `fname` + requires, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. + + compat_args: OrderedDict + A ordered dictionary of keys that `kwargs` is allowed to + have and their associated default values. Note that if there + is only one key, a generic dict can be passed in as well. + + Raises + ------ + TypeError if `args` contains more values than there are + `compat_args` OR `kwargs` contains keys not in `compat_args` + ValueError if `args` contains values not at the default value (`None`) + `kwargs` contains keys in `compat_args` that do not map to the default + value as specified in `compat_args` + + See Also + -------- + validate_args : purely args validation + validate_kwargs : purely kwargs validation + + """ + # Check that the total number of arguments passed in (i.e. + # args and kwargs) does not exceed the length of compat_args + _check_arg_length(fname, args + tuple(kwargs.values()), + max_fname_arg_count, compat_args) + + # Check there is no overlap with the positional and keyword + # arguments, similar to what is done in actual Python functions + args_dict = dict(zip(compat_args, args)) + + for key in args_dict: + if key in kwargs: + raise TypeError("{fname}() got multiple values for keyword " + "argument '{arg}'".format(fname=fname, arg=key)) + + kwargs.update(args_dict) + validate_kwargs(fname, kwargs, compat_args) diff --git a/setup.py b/setup.py index 5e969de53ef5b..596fe62ff0781 100755 --- a/setup.py +++ b/setup.py @@ -557,6 +557,7 @@ def pxd(name): version=versioneer.get_version(), packages=['pandas', 'pandas.compat', + 'pandas.compat.numpy', 'pandas.computation', 'pandas.computation.tests', 'pandas.core',