From 6fbc9ceaa91136c7dac379084a5cb7eae45d1366 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 11 May 2016 16:08:30 +0100 Subject: [PATCH 01/14] BUG: First pass at fine-grained errstate. --- pandas/compat/numpy/__init__.py | 2 -- pandas/computation/align.py | 3 ++- pandas/computation/expressions.py | 3 ++- pandas/computation/ops.py | 3 ++- pandas/core/frame.py | 9 ++++++--- pandas/core/groupby.py | 12 +++++++++--- pandas/core/internals.py | 6 ++++-- pandas/core/nanops.py | 21 +++++++++++++-------- pandas/core/ops.py | 18 ++++++++++++------ pandas/core/panel.py | 29 +++++++++++++++++------------ pandas/core/series.py | 27 ++++++++++++++++----------- pandas/core/window.py | 23 +++++++++++++---------- pandas/formats/format.py | 7 ++++--- pandas/indexes/base.py | 10 +++++++--- pandas/indexes/range.py | 12 ++++++++---- pandas/sparse/array.py | 25 +++++++++++++++---------- pandas/sparse/series.py | 3 ++- 17 files changed, 132 insertions(+), 81 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 8ecc5dc979792..f2d837a4c9908 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -5,8 +5,6 @@ from distutils.version import LooseVersion from pandas.compat import string_types, string_and_binary_types -# turn off all numpy warnings -np.seterr(all='ignore') # numpy versioning _np_version = np.version.short_version diff --git a/pandas/computation/align.py b/pandas/computation/align.py index a117342fdefe2..8c70162f31880 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -95,7 +95,8 @@ def _align_core(terms): term_axis_size = len(ti.axes[axis]) reindexer_size = len(reindexer) - ordm = np.log10(abs(reindexer_size - term_axis_size)) + with np.errstate(divide='ignore'): + ordm = np.log10(abs(reindexer_size - term_axis_size)) if ordm >= 1 and reindexer_size >= 10000: warnings.warn('Alignment difference on axis {0} is larger ' 'than an order of magnitude on term {1!r}, ' diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index 086e92dbde1a0..8fd9ab3477b74 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -59,7 +59,8 @@ def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): """ standard evaluation """ if _TEST_MODE: _store_test_result(False) - return op(a, b) + with np.errstate(all='ignore'): + return op(a, b) def _can_use_numexpr(op, op_str, a, b, dtype_check): diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 96a04cff9372e..9446e84d891c4 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -523,7 +523,8 @@ def __init__(self, func, args): def __call__(self, env): operands = [op(env) for op in self.operands] - return self.func.func(*operands) + with np.errstate(all='ignore'): + return self.func.func(*operands) def __unicode__(self): operands = map(str, self.operands) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5db755b0d3dac..0c403acd3b586 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3813,7 +3813,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, this = self[col].values that = other[col].values if filter_func is not None: - mask = ~filter_func(this) | isnull(that) + with np.errstate(all='ignore'): + mask = ~filter_func(this) | isnull(that) else: if raise_conflict: mask_this = notnull(that) @@ -4108,7 +4109,8 @@ def f(x): return self._apply_empty_result(func, axis, reduce, *args, **kwds) if isinstance(f, np.ufunc): - results = f(self.values) + with np.errstate(all='ignore'): + results = f(self.values) return self._constructor(data=results, index=self.index, columns=self.columns, copy=False) else: @@ -4934,7 +4936,8 @@ def f(x): "type %s not implemented." % filter_type) raise_with_traceback(e) - result = f(data.values) + with np.errstate(all='ignore'): + result = f(data.values) labels = data._get_agg_axis(axis) else: if numeric_only: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5c3c5bbfab9a8..83266a99cde8a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -678,7 +678,8 @@ def apply(self, func, *args, **kwargs): @wraps(func) def f(g): - return func(g, *args, **kwargs) + with np.errstate(all='ignore'): + return func(g, *args, **kwargs) else: raise ValueError('func must be a callable if args or ' 'kwargs are supplied') @@ -4126,7 +4127,10 @@ def loop(labels, shape): out = stride * labels[0].astype('i8', subok=False, copy=False) for i in range(1, nlev): - stride //= shape[i] + if shape[i] == 0: + stride = 0 + else: + stride //= shape[i] out += labels[i] * stride if xnull: # exclude nulls @@ -4365,7 +4369,9 @@ def _get_group_index_sorter(group_index, ngroups): count = len(group_index) alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters - if alpha + beta * ngroups < count * np.log(count): + with np.errstate(divide='ignore', invalid='ignore'): + do_groupsort = alpha + beta * ngroups < count * np.log(count) + if do_groupsort: sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), ngroups) return _ensure_platform_int(sorter) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e9b45e444d8d8..e11fd4086347f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -348,7 +348,8 @@ def apply(self, func, mgr=None, **kwargs): """ apply the function to my values; return a block if we are not one """ - result = func(self.values, **kwargs) + with np.errstate(all='ignore'): + result = func(self.values, **kwargs) if not isinstance(result, Block): result = self.make_block(values=_block_shape(result, ndim=self.ndim)) @@ -1156,7 +1157,8 @@ def handle_error(): # get the result try: - result = get_result(other) + with np.errstate(all='ignore'): + result = get_result(other) # if we have an invalid shape/broadcast error # GH4576, so raise instead of allowing to pass through diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2199daf549824..a53bada5cf744 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -45,7 +45,8 @@ def _f(*args, **kwargs): 'this dtype'.format( f.__name__.replace('nan', ''))) try: - return f(*args, **kwargs) + with np.errstate(invalid='ignore'): + return f(*args, **kwargs) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError @@ -513,7 +514,8 @@ def nanskew(values, axis=None, skipna=True): m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) - result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) + with np.errstate(invalid='ignore', divide='ignore'): + result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) dtype = values.dtype if is_float_dtype(dtype): @@ -562,10 +564,11 @@ def nankurt(values, axis=None, skipna=True): m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) - adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) - numer = count * (count + 1) * (count - 1) * m4 - denom = (count - 2) * (count - 3) * m2**2 - result = numer / denom - adj + with np.errstate(invalid='ignore', divide='ignore'): + adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) + numer = count * (count + 1) * (count - 1) * m4 + denom = (count - 2) * (count - 3) * m2**2 + result = numer / denom - adj # floating point error numer = _zero_out_fperr(numer) @@ -658,7 +661,8 @@ def _maybe_null_out(result, axis, mask): def _zero_out_fperr(arg): if isinstance(arg, np.ndarray): - return np.where(np.abs(arg) < 1e-14, 0, arg) + with np.errstate(invalid='ignore'): + return np.where(np.abs(arg) < 1e-14, 0, arg) else: return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg @@ -760,7 +764,8 @@ def f(x, y): ymask = isnull(y) mask = xmask | ymask - result = op(x, y) + with np.errstate(all='ignore'): + result = op(x, y) if mask.any(): if is_bool_dtype(result): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 66d9391d2facf..8d49e41284a7b 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -636,7 +636,8 @@ def na_op(x, y): def safe_na_op(lvalues, rvalues): try: - return na_op(lvalues, rvalues) + with np.errstate(all='ignore'): + return na_op(lvalues, rvalues) except Exception: if isinstance(rvalues, ABCSeries): if is_object_dtype(rvalues): @@ -743,7 +744,8 @@ def na_op(x, y): x = x.view('i8') try: - result = getattr(x, name)(y) + with np.errstate(all='ignore'): + result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: @@ -796,13 +798,15 @@ def wrapper(self, other, axis=None): # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. - res = op(self.values, other) + with np.errstate(all='ignore'): + res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) - res = na_op(values, other) + with np.errstate(all='ignore'): + res = na_op(values, other) if isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) @@ -1096,13 +1100,15 @@ def na_op(x, y): xrav = xrav[mask] yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): - result[mask] = op(xrav, yrav) + with np.errstate(all='ignore'): + result[mask] = op(xrav, yrav) elif hasattr(x, 'size'): result = np.empty(x.size, dtype=x.dtype) mask = notnull(xrav) xrav = xrav[mask] if np.prod(xrav.shape): - result[mask] = op(xrav, y) + with np.errstate(all='ignore'): + result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between " "objects of type {x} and {y}".format( diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b2082ce29545e..b2f318d825db6 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -713,7 +713,8 @@ def _combine(self, other, func, axis=0): (str(type(other)), str(type(self)))) def _combine_const(self, other, func): - new_values = func(self.values, other) + with np.errstate(all='ignore'): + new_values = func(self.values, other) d = self._construct_axes_dict() return self._constructor(new_values, **d) @@ -723,14 +724,15 @@ def _combine_frame(self, other, func, axis=0): other = other.reindex(index=index, columns=columns) - if axis == 0: - new_values = func(self.values, other.values) - elif axis == 1: - new_values = func(self.values.swapaxes(0, 1), other.values.T) - new_values = new_values.swapaxes(0, 1) - elif axis == 2: - new_values = func(self.values.swapaxes(0, 2), other.values) - new_values = new_values.swapaxes(0, 2) + with np.errstate(all='ignore'): + if axis == 0: + new_values = func(self.values, other.values) + elif axis == 1: + new_values = func(self.values.swapaxes(0, 1), other.values.T) + new_values = new_values.swapaxes(0, 1) + elif axis == 2: + new_values = func(self.values.swapaxes(0, 2), other.values) + new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) @@ -744,7 +746,8 @@ def _combine_panel(self, other, func): this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) - result_values = func(this.values, other.values) + with np.errstate(all='ignore'): + result_values = func(this.values, other.values) return self._constructor(result_values, items, major, minor) @@ -1011,7 +1014,8 @@ def apply(self, func, axis='major', **kwargs): # try ufunc like if isinstance(f, np.ufunc): try: - result = np.apply_along_axis(func, axis, self.values) + with np.errstate(all='ignore'): + result = np.apply_along_axis(func, axis, self.values) return self._wrap_result(result, axis=axis) except (AttributeError): pass @@ -1113,7 +1117,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds) - result = f(self.values) + with np.errstate(all='ignore'): + result = f(self.values) axes = self._get_plane_axes(axis_name) if result.ndim == 2 and axis_name != self._info_axis_name: diff --git a/pandas/core/series.py b/pandas/core/series.py index e388683012a66..32edcf6e698a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1626,7 +1626,8 @@ def _binop(self, other, func, level=None, fill_value=None): this_vals[this_mask & mask] = fill_value other_vals[other_mask & mask] = fill_value - result = func(this_vals, other_vals) + with np.errstate(all='ignore'): + result = func(this_vals, other_vals) name = _maybe_match_name(self, other) result = self._constructor(result, index=new_index, name=name) result = result.__finalize__(self) @@ -1658,10 +1659,12 @@ def combine(self, other, func, fill_value=nan): for i, idx in enumerate(new_index): lv = self.get(idx, fill_value) rv = other.get(idx, fill_value) - new_values[i] = func(lv, rv) + with np.errstate(all='ignore'): + new_values[i] = func(lv, rv) else: new_index = self.index - new_values = func(self._values, other) + with np.errstate(all='ignore'): + new_values = func(self._values, other) new_name = self.name return self._constructor(new_values, index=new_index, name=new_name) @@ -2240,14 +2243,15 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): else: f = func - if isinstance(f, np.ufunc): - return f(self) + with np.errstate(all='ignore'): + if isinstance(f, np.ufunc): + return f(self) - if is_extension_type(self.dtype): - mapped = self._values.map(f) - else: - values = self.asobject - mapped = lib.map_infer(values, f, convert=convert_dtype) + if is_extension_type(self.dtype): + mapped = self._values.map(f) + else: + values = self.asobject + mapped = lib.map_infer(values, f, convert=convert_dtype) if len(mapped) and isinstance(mapped[0], Series): from pandas.core.frame import DataFrame @@ -2272,7 +2276,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, if numeric_only: raise NotImplementedError('Series.{0} does not implement ' 'numeric_only.'.format(name)) - return op(delegate, skipna=skipna, **kwds) + with np.errstate(all='ignore'): + return op(delegate, skipna=skipna, **kwds) return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only, diff --git a/pandas/core/window.py b/pandas/core/window.py index 9e2a27adc25a7..b7276aed506de 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -733,10 +733,11 @@ def calc(x): def calc(x): return func(x, window, min_periods=self.min_periods) - if values.ndim > 1: - result = np.apply_along_axis(calc, self.axis, values) - else: - result = calc(values) + with np.errstate(all='ignore'): + if values.ndim > 1: + result = np.apply_along_axis(calc, self.axis, values) + else: + result = calc(values) if center: result = self._center_window(result, window) @@ -1617,10 +1618,11 @@ def _cov(x, y): x_values = X._prep_values() y_values = Y._prep_values() - cov = _cov(x_values, y_values) - x_var = _cov(x_values, x_values) - y_var = _cov(y_values, y_values) - corr = cov / _zsqrt(x_var * y_var) + with np.errstate(all='ignore'): + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / _zsqrt(x_var * y_var) return X._wrap_result(corr) return _flex_binary_moment(self._selected_obj, other._selected_obj, @@ -1757,8 +1759,9 @@ def _use_window(minp, window): def _zsqrt(x): - result = np.sqrt(x) - mask = x < 0 + with np.errstate(all='ignore'): + result = np.sqrt(x) + mask = x < 0 from pandas import DataFrame if isinstance(x, DataFrame): diff --git a/pandas/formats/format.py b/pandas/formats/format.py index f89ceaff2ad64..0613d59bedd8e 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2099,9 +2099,10 @@ def format_values_with(float_format): # this is pretty arbitrary for now # large values: more that 8 characters including decimal symbol # and first digit, hence > 1e6 - has_large_values = (abs_vals > 1e6).any() - has_small_values = ((abs_vals < 10**(-self.digits)) & - (abs_vals > 0)).any() + with np.errstate(invalid='ignore'): + has_large_values = (abs_vals > 1e6).any() + has_small_values = ((abs_vals < 10**(-self.digits)) & + (abs_vals > 0)).any() if has_small_values or (too_long and has_large_values): float_format = '%% .%de' % self.digits diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 9b378715b8a96..68d4cf846b699 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3303,9 +3303,11 @@ def _evaluate_compare(self, other): if is_object_dtype(self) and self.nlevels == 1: # don't pass MultiIndex - result = _comp_method_OBJECT_ARRAY(op, self.values, other) + with np.errstate(all='ignore'): + result = _comp_method_OBJECT_ARRAY(op, self.values, other) else: - result = op(self.values, np.asarray(other)) + with np.errstate(all='ignore'): + result = op(self.values, np.asarray(other)) # technically we could support bool dtyped Index # for now just return the indexing array directly @@ -3450,7 +3452,9 @@ def _evaluate_numeric_binop(self, other): attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) - return Index(op(values, other), **attrs) + with np.errstate(all='ignore'): + result = op(values, other) + return Index(result, **attrs) return _evaluate_numeric_binop diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 465ec4904f7ee..76166e7155bd0 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -576,7 +576,8 @@ def _evaluate_numeric_binop(self, other): try: # alppy if we have an override if step: - rstep = step(self._step, other) + with np.errstate(all='ignore'): + rstep = step(self._step, other) # we don't have a representable op # so return a base index @@ -586,8 +587,9 @@ def _evaluate_numeric_binop(self, other): else: rstep = self._step - rstart = op(self._start, other) - rstop = op(self._stop, other) + with np.errstate(all='ignore'): + rstart = op(self._start, other) + rstop = op(self._stop, other) result = RangeIndex(rstart, rstop, @@ -612,7 +614,9 @@ def _evaluate_numeric_binop(self, other): if isinstance(other, RangeIndex): other = other.values - return Index(op(self, other), **attrs) + with np.errstate(all='ignore'): + results = op(self, other) + return Index(results, **attrs) return _evaluate_numeric_binop diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 8d564d0abbf3f..0b3128acc3c9d 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -55,9 +55,11 @@ def wrapper(self, other): dtype=dtype) return _sparse_array_op(self, other, op, name) elif is_scalar(other): - fill = op(_get_fill(self), np.asarray(other)) - return _wrap_result(name, op(self.sp_values, other), - self.sp_index, fill) + with np.errstate(all='ignore'): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + return _wrap_result(name, result, self.sp_index, fill) else: # pragma: no cover raise TypeError('operation with %s not supported' % type(other)) @@ -101,17 +103,19 @@ def _sparse_array_op(left, right, op, name, series=False): result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: - result = op(left.get_values(), right.get_values()) + with np.seterr(all='ignore'): + result = op(left.get_values(), right.get_values()) + fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index - fill = op(_get_fill(left), _get_fill(right)) elif left.sp_index.equals(right.sp_index): - result = op(left.sp_values, right.sp_values) + with np.seterr(all='ignore'): + result = op(left.sp_values, right.sp_values) + fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index - fill = op(_get_fill(left), _get_fill(right)) else: if name[0] == 'r': left, right = right, left @@ -129,9 +133,10 @@ def _sparse_array_op(left, right, op, name, series=False): right_sp_values = right.sp_values sparse_op = getattr(splib, opname) - result, index, fill = sparse_op(left_sp_values, left.sp_index, - left.fill_value, right_sp_values, - right.sp_index, right.fill_value) + with np.seterr(all='ignore'): + result, index, fill = sparse_op(left_sp_values, left.sp_index, + left.fill_value, right_sp_values, + right.sp_index, right.fill_value) if result_dtype is None: result_dtype = result.dtype diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 4ad77b4deab4f..99b058097af02 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -57,7 +57,8 @@ def wrapper(self, other): elif isinstance(other, DataFrame): return NotImplemented elif is_scalar(other): - new_values = op(self.values, other) + with np.errstate(all='ignore'): + new_values = op(self.values, other) return self._constructor(new_values, index=self.index, name=self.name) From eca512cdf44517ca5856c92eee9cfdf8eb4b87fd Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 11 May 2016 21:04:35 +0100 Subject: [PATCH 02/14] BUG: Handle NaT explicitly. --- pandas/tslib.pyx | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 53c77b2d8f9d7..c1b990c417553 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1516,8 +1516,12 @@ cdef inline void _localize_tso(_TSObject obj, object tz): dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, tz) delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - pandas_datetime_to_datetimestruct(obj.value + delta, - PANDAS_FR_ns, &obj.dts) + if obj.value != NPY_NAT: + pandas_datetime_to_datetimestruct(obj.value + delta, + PANDAS_FR_ns, &obj.dts) + else: + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz else: # Adjust datetime64 timestamp, recompute datetimestruct @@ -1529,7 +1533,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): # static/pytz/dateutil specific code if _is_fixed_offset(tz): # statictzinfo - if len(deltas) > 0: + if len(deltas) > 0 and obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + deltas[0], PANDAS_FR_ns, &obj.dts) else: @@ -1537,12 +1541,20 @@ cdef inline void _localize_tso(_TSObject obj, object tz): obj.tzinfo = tz elif _treat_tz_as_pytz(tz): inf = tz._transition_info[pos] - pandas_datetime_to_datetimestruct(obj.value + deltas[pos], - PANDAS_FR_ns, &obj.dts) + if obj.value != NPY_NAT: + pandas_datetime_to_datetimestruct(obj.value + deltas[pos], + PANDAS_FR_ns, &obj.dts) + else: + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz._tzinfos[inf] elif _treat_tz_as_dateutil(tz): - pandas_datetime_to_datetimestruct(obj.value + deltas[pos], - PANDAS_FR_ns, &obj.dts) + if obj.value != NPY_NAT: + pandas_datetime_to_datetimestruct(obj.value + deltas[pos], + PANDAS_FR_ns, &obj.dts) + else: + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz else: obj.tzinfo = tz From 3b12f08afe33f1d4398027868028c94bda13eff3 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 11 May 2016 21:05:38 +0100 Subject: [PATCH 03/14] ENH: Silence numpy warnings from certain expressions computed during tests. --- pandas/computation/tests/test_eval.py | 6 +- pandas/sparse/tests/test_array.py | 20 +++-- pandas/tests/formats/test_format.py | 2 +- pandas/tests/frame/test_apply.py | 30 ++++--- pandas/tests/frame/test_misc_api.py | 3 +- pandas/tests/frame/test_operators.py | 17 ++-- pandas/tests/indexes/common.py | 5 +- pandas/tests/series/test_analytics.py | 55 ++++++------ pandas/tests/series/test_apply.py | 23 ++--- pandas/tests/series/test_operators.py | 119 +++++++++++++------------- pandas/tests/test_nanops.py | 14 +-- pandas/tests/test_panel.py | 18 ++-- pandas/tests/test_panel4d.py | 13 +-- pandas/tseries/tests/test_base.py | 1 + 14 files changed, 176 insertions(+), 150 deletions(-) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 066df0521fef6..c50944f0a4d3b 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1613,7 +1613,8 @@ def test_unary_functions(self): for fn in self.unary_fns: expr = "{0}(a)".format(fn) got = self.eval(expr) - expect = getattr(np, fn)(a) + with np.errstate(all='ignore'): + expect = getattr(np, fn)(a) tm.assert_series_equal(got, expect, check_names=False) def test_binary_functions(self): @@ -1624,7 +1625,8 @@ def test_binary_functions(self): for fn in self.binary_fns: expr = "{0}(a, b)".format(fn) got = self.eval(expr) - expect = getattr(np, fn)(a, b) + with np.errstate(all='ignore'): + expect = getattr(np, fn)(a, b) tm.assert_almost_equal(got, expect, check_names=False) def test_df_use_case(self): diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 70cda5acc3f4c..53fce0e1292c1 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -516,15 +516,17 @@ def _check_inplace_op(op): tmp = arr1.copy() self.assertRaises(NotImplementedError, op, tmp, arr2) - bin_ops = [operator.add, operator.sub, operator.mul, operator.truediv, - operator.floordiv, operator.pow] - for op in bin_ops: - _check_op(op, arr1, arr2) - _check_op(op, farr1, farr2) - - inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow'] - for op in inplace_ops: - _check_inplace_op(getattr(operator, op)) + with np.errstate(all='ignore'): + bin_ops = [operator.add, operator.sub, operator.mul, + operator.truediv, operator.floordiv, operator.pow] + for op in bin_ops: + _check_op(op, arr1, arr2) + _check_op(op, farr1, farr2) + + inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', + 'ipow'] + for op in inplace_ops: + _check_inplace_op(getattr(operator, op)) def test_pickle(self): def _check_roundtrip(obj): diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 8a4aca2b320aa..e6147737e9a1d 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1668,7 +1668,7 @@ def test_string_repr_encoding(self): def test_repr_corner(self): # representing infs poses no problems - df = DataFrame({'foo': np.inf * np.empty(10)}) + df = DataFrame({'foo': [-np.inf, np.inf]}) repr(df) def test_frame_info_encoding(self): diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 020b7f1f1ab9d..5cadb4dba577f 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -22,18 +22,19 @@ class TestDataFrameApply(tm.TestCase, TestData): _multiprocess_can_split_ = True def test_apply(self): - # ufunc - applied = self.frame.apply(np.sqrt) - assert_series_equal(np.sqrt(self.frame['A']), applied['A']) + with np.errstate(all='ignore'): + # ufunc + applied = self.frame.apply(np.sqrt) + assert_series_equal(np.sqrt(self.frame['A']), applied['A']) - # aggregator - applied = self.frame.apply(np.mean) - self.assertEqual(applied['A'], np.mean(self.frame['A'])) + # aggregator + applied = self.frame.apply(np.mean) + self.assertEqual(applied['A'], np.mean(self.frame['A'])) - d = self.frame.index[0] - applied = self.frame.apply(np.mean, axis=1) - self.assertEqual(applied[d], np.mean(self.frame.xs(d))) - self.assertIs(applied.index, self.frame.index) # want this + d = self.frame.index[0] + applied = self.frame.apply(np.mean, axis=1) + self.assertEqual(applied[d], np.mean(self.frame.xs(d))) + self.assertIs(applied.index, self.frame.index) # want this # invalid axis df = DataFrame( @@ -187,10 +188,11 @@ def _checkit(axis=0, raw=False): _checkit(raw=True) _checkit(axis=0, raw=True) - _check(no_cols, lambda x: x) - _check(no_cols, lambda x: x.mean()) - _check(no_index, lambda x: x) - _check(no_index, lambda x: x.mean()) + with np.errstate(all='ignore'): + _check(no_cols, lambda x: x) + _check(no_cols, lambda x: x.mean()) + _check(no_index, lambda x: x) + _check(no_index, lambda x: x.mean()) result = no_cols.apply(lambda x: x.mean(), broadcast=True) tm.assertIsInstance(result, DataFrame) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 03b3c0a5e65d0..089b71b30119b 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -207,7 +207,8 @@ def test_new_empty_index(self): self.assertIsNone(df2.index.name) def test_array_interface(self): - result = np.sqrt(self.frame) + with np.errstate(all='ignore'): + result = np.sqrt(self.frame) tm.assertIsInstance(result, type(self.frame)) self.assertIs(result.index, self.frame.index) self.assertIs(result.columns, self.frame.columns) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 5f3eb84f72127..a9ca4f626c42e 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -217,7 +217,9 @@ def test_modulo(self): assert_frame_equal(result, expected) # numpy has a slightly different (wrong) treatement - result2 = DataFrame(p.values % p.values, index=p.index, + with np.errstate(all='ignore'): + arr = p.values % p.values + result2 = DataFrame(arr, index=p.index, columns=p.columns, dtype='float64') result2.iloc[0:3, 1] = np.nan assert_frame_equal(result2, expected) @@ -227,8 +229,9 @@ def test_modulo(self): assert_frame_equal(result, expected) # numpy has a slightly different (wrong) treatement - result2 = DataFrame(p.values.astype('float64') % - 0, index=p.index, columns=p.columns) + with np.errstate(all='ignore'): + arr = p.values.astype('float64') % 0 + result2 = DataFrame(arr, index=p.index, columns=p.columns) assert_frame_equal(result2, expected) # not commutative with series @@ -248,7 +251,9 @@ def test_div(self): 'second': Series([nan, nan, nan, 1])}) assert_frame_equal(result, expected) - result2 = DataFrame(p.values.astype('float') / p.values, index=p.index, + with np.errstate(all='ignore'): + arr = p.values.astype('float') / p.values + result2 = DataFrame(arr, index=p.index, columns=p.columns) assert_frame_equal(result2, expected) @@ -258,7 +263,9 @@ def test_div(self): assert_frame_equal(result, expected) # numpy has a slightly different (wrong) treatement - result2 = DataFrame(p.values.astype('float64') / 0, index=p.index, + with np.errstate(all='ignore'): + arr = p.values.astype('float64') / 0 + result2 = DataFrame(arr, index=p.index, columns=p.columns) assert_frame_equal(result2, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 26f90a814ab29..ed3584f1728e6 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -712,8 +712,9 @@ def test_numpy_ufuncs(self): func(idx) elif isinstance(idx, (Float64Index, Int64Index)): # coerces to float (e.g. np.sin) - result = func(idx) - exp = Index(func(idx.values), name=idx.name) + with np.errstate(all='ignore'): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) self.assert_index_equal(result, exp) self.assertIsInstance(result, pd.Float64Index) else: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6575c106f006f..24e3a0ff5f325 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -622,39 +622,40 @@ def test_all_any_params(self): self.assertRaises(NotImplementedError, s.all, bool_only=True) def test_modulo(self): + with np.errstate(all='ignore'): + + # GH3590, modulo as ints + p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + result = p['first'] % p['second'] + expected = Series(p['first'].values % p['second'].values, + dtype='float64') + expected.iloc[0:3] = np.nan + assert_series_equal(result, expected) - # GH3590, modulo as ints - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p['first'] % p['second'] - expected = Series(p['first'].values % p['second'].values, - dtype='float64') - expected.iloc[0:3] = np.nan - assert_series_equal(result, expected) - - result = p['first'] % 0 - expected = Series(np.nan, index=p.index, name='first') - assert_series_equal(result, expected) + result = p['first'] % 0 + expected = Series(np.nan, index=p.index, name='first') + assert_series_equal(result, expected) - p = p.astype('float64') - result = p['first'] % p['second'] - expected = Series(p['first'].values % p['second'].values) - assert_series_equal(result, expected) + p = p.astype('float64') + result = p['first'] % p['second'] + expected = Series(p['first'].values % p['second'].values) + assert_series_equal(result, expected) - p = p.astype('float64') - result = p['first'] % p['second'] - result2 = p['second'] % p['first'] - self.assertFalse(np.array_equal(result, result2)) + p = p.astype('float64') + result = p['first'] % p['second'] + result2 = p['second'] % p['first'] + self.assertFalse(np.array_equal(result, result2)) - # GH 9144 - s = Series([0, 1]) + # GH 9144 + s = Series([0, 1]) - result = s % 0 - expected = Series([nan, nan]) - assert_series_equal(result, expected) + result = s % 0 + expected = Series([nan, nan]) + assert_series_equal(result, expected) - result = 0 % s - expected = Series([nan, 0.0]) - assert_series_equal(result, expected) + result = 0 % s + expected = Series([nan, 0.0]) + assert_series_equal(result, expected) def test_ops_consistency_on_empty(self): diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 26fc80c3ef988..8d7676bef4d72 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -18,17 +18,18 @@ class TestSeriesApply(TestData, tm.TestCase): _multiprocess_can_split_ = True def test_apply(self): - assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) - - # elementwise-apply - import math - assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) - - # how to handle Series result, #2316 - result = self.ts.apply(lambda x: Series( - [x, x ** 2], index=['x', 'x^2'])) - expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) - tm.assert_frame_equal(result, expected) + with np.errstate(all='ignore'): + assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) + + # elementwise-apply + import math + assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) + + # how to handle Series result, #2316 + result = self.ts.apply(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) + expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) + tm.assert_frame_equal(result, expected) # empty series s = Series(dtype=object, name='foo', index=pd.Index([], name='bar')) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 5ebe528ff8cab..5fc44fe1dc608 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -34,7 +34,8 @@ def test_comparisons(self): left[:3] = np.nan result = nanops.nangt(left, right) - expected = (left > right).astype('O') + with np.errstate(invalid='ignore'): + expected = (left > right).astype('O') expected[:3] = np.nan assert_almost_equal(result, expected) @@ -81,62 +82,63 @@ def test_invert(self): assert_series_equal(-(self.series < 0), ~(self.series < 0)) def test_div(self): + with np.errstate(all='ignore'): + # no longer do integer div for any ops, but deal with the 0's + p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + result = p['first'] / p['second'] + expected = Series( + p['first'].values.astype(float) / p['second'].values, + dtype='float64') + expected.iloc[0:3] = np.inf + assert_series_equal(result, expected) - # no longer do integer div for any ops, but deal with the 0's - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p['first'] / p['second'] - expected = Series(p['first'].values.astype(float) / p['second'].values, - dtype='float64') - expected.iloc[0:3] = np.inf - assert_series_equal(result, expected) - - result = p['first'] / 0 - expected = Series(np.inf, index=p.index, name='first') - assert_series_equal(result, expected) + result = p['first'] / 0 + expected = Series(np.inf, index=p.index, name='first') + assert_series_equal(result, expected) - p = p.astype('float64') - result = p['first'] / p['second'] - expected = Series(p['first'].values / p['second'].values) - assert_series_equal(result, expected) + p = p.astype('float64') + result = p['first'] / p['second'] + expected = Series(p['first'].values / p['second'].values) + assert_series_equal(result, expected) - p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]}) - result = p['first'] / p['second'] - assert_series_equal(result, p['first'].astype('float64'), - check_names=False) - self.assertTrue(result.name is None) - self.assertFalse(np.array_equal(result, p['second'] / p['first'])) - - # inf signing - s = Series([np.nan, 1., -1.]) - result = s / 0 - expected = Series([np.nan, np.inf, -np.inf]) - assert_series_equal(result, expected) + p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]}) + result = p['first'] / p['second'] + assert_series_equal(result, p['first'].astype('float64'), + check_names=False) + self.assertTrue(result.name is None) + self.assertFalse(np.array_equal(result, p['second'] / p['first'])) + + # inf signing + s = Series([np.nan, 1., -1.]) + result = s / 0 + expected = Series([np.nan, np.inf, -np.inf]) + assert_series_equal(result, expected) - # float/integer issue - # GH 7785 - p = DataFrame({'first': (1, 0), 'second': (-0.01, -0.02)}) - expected = Series([-0.01, -np.inf]) + # float/integer issue + # GH 7785 + p = DataFrame({'first': (1, 0), 'second': (-0.01, -0.02)}) + expected = Series([-0.01, -np.inf]) - result = p['second'].div(p['first']) - assert_series_equal(result, expected, check_names=False) + result = p['second'].div(p['first']) + assert_series_equal(result, expected, check_names=False) - result = p['second'] / p['first'] - assert_series_equal(result, expected) + result = p['second'] / p['first'] + assert_series_equal(result, expected) - # GH 9144 - s = Series([-1, 0, 1]) + # GH 9144 + s = Series([-1, 0, 1]) - result = 0 / s - expected = Series([0.0, nan, 0.0]) - assert_series_equal(result, expected) + result = 0 / s + expected = Series([0.0, nan, 0.0]) + assert_series_equal(result, expected) - result = s / 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + result = s / 0 + expected = Series([-inf, nan, inf]) + assert_series_equal(result, expected) - result = s // 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + result = s // 0 + expected = Series([-inf, nan, inf]) + assert_series_equal(result, expected) def test_operators(self): def _check_op(series, other, op, pos_only=False, @@ -1432,18 +1434,19 @@ def _check_fill(meth, op, a, b, fill_value=0): exp_values = [] for i in range(len(exp_index)): - if amask[i]: - if bmask[i]: - exp_values.append(nan) - continue - exp_values.append(op(fill_value, b[i])) - elif bmask[i]: + with np.errstate(all='ignore'): if amask[i]: - exp_values.append(nan) - continue - exp_values.append(op(a[i], fill_value)) - else: - exp_values.append(op(a[i], b[i])) + if bmask[i]: + exp_values.append(nan) + continue + exp_values.append(op(fill_value, b[i])) + elif bmask[i]: + if amask[i]: + exp_values.append(nan) + continue + exp_values.append(op(a[i], fill_value)) + else: + exp_values.append(op(a[i], b[i])) result = meth(a, b, fill_value=fill_value) expected = Series(exp_values, exp_index) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index eeeddc278c714..dd3a49de55d73 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -58,12 +58,14 @@ def setUp(self): 'O'), self.arr_utf.astype('O'), self.arr_date.astype('O'), self.arr_tdelta.astype('O')]) - self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j - self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj]) - - self.arr_nan_infj = self.arr_inf * 1j - self.arr_complex_nan_infj = np.vstack([self.arr_complex, - self.arr_nan_infj]) + with np.errstate(invalid='ignore'): + self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j + self.arr_complex_nan = np.vstack([self.arr_complex, + self.arr_nan_nanj]) + + self.arr_nan_infj = self.arr_inf * 1j + self.arr_complex_nan_infj = np.vstack([self.arr_complex, + self.arr_nan_infj]) self.arr_float_2d = self.arr_float[:, :, 0] self.arr_float1_2d = self.arr_float1[:, :, 0] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1f9ca4635b585..10a6693525590 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -824,12 +824,13 @@ def test_comp(func): self.assert_numpy_array_equal(result3.values, func(self.panel.values, 0)) - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + with np.errstate(invalid='ignore'): + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) def test_get_value(self): for item in self.panel.items: @@ -1186,8 +1187,9 @@ def test_apply(self): # ufunc applied = self.panel.apply(np.sqrt) - self.assertTrue(assert_almost_equal(applied.values, np.sqrt( - self.panel.values))) + with np.errstate(invalid='ignore'): + expected = np.sqrt(self.panel.values) + assert_almost_equal(applied.values, expected) # ufunc same shape result = self.panel.apply(lambda x: x * 2, axis='items') diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 50ede3f2c2367..493889e579af2 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -461,12 +461,13 @@ def test_comp(func): self.assert_numpy_array_equal(result3.values, func(self.panel4d.values, 0)) - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + with np.errstate(invalid='ignore'): + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) def test_major_xs(self): ref = self.panel4d['l1']['ItemA'] diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 0d6c991f00c8b..2d7d407a2bb7b 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -347,6 +347,7 @@ def test_summary(self): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]): result = idx.summary() + print((expected, result)) self.assertEqual(result, expected) def test_resolution(self): From c9df7b3c2d0031ec6baf5ffd352d0abe20fa6858 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 11 May 2016 22:29:02 +0100 Subject: [PATCH 04/14] BUG: removed debugging print --- pandas/tseries/tests/test_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 2d7d407a2bb7b..0d6c991f00c8b 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -347,7 +347,6 @@ def test_summary(self): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]): result = idx.summary() - print((expected, result)) self.assertEqual(result, expected) def test_resolution(self): From 69328517981d212e0c18b05b1044d973b4e87bc9 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 11 May 2016 22:44:02 +0100 Subject: [PATCH 05/14] TST: Basic check that the global errstate remains unchanged. --- pandas/tests/test_util.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index d6baa720bac19..9193880df7feb 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -326,6 +326,16 @@ def test_exactly_one_ref(self): self.assertEqual(bytearray(as_stolen_buf), b'test') +def test_numpy_errstate_is_default(): + # The defaults since numpy 1.6.0 + expected = {'over': 'warn', 'divide': 'warn', 'invalid': 'warn', + 'under': 'ignore'} + import numpy as np + from pandas.compat import numpy # noqa + # The errstate should be unchanged after that import. + tm.assert_equal(np.geterr(), expected) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 863ac93804083803df5ac61c465f35c65d573770 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 12 May 2016 11:02:38 +0100 Subject: [PATCH 06/14] TST: Add a new test to ensure that boolean comparisons are errstate-protected. --- pandas/tests/frame/test_operators.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index a9ca4f626c42e..85aadee8b0900 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -929,6 +929,15 @@ def test_comp(func): test_comp(operator.ge) test_comp(operator.le) + def test_comparison_protected_from_errstate(self): + missing_df = tm.makeDataFrame() + missing_df.iloc[0]['A'] = np.nan + with np.errstate(invalid='ignore'): + expected = missing_df.values < 0 + with np.errstate(invalid='raise'): + result = (missing_df < 0).values + self.assert_numpy_array_equal(result, expected) + def test_string_comparison(self): df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) mask_a = df.a > 1 From 0e1ea8107c82ae614dd606e3f2d56e62fb9da9c6 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 12 May 2016 12:10:24 +0100 Subject: [PATCH 07/14] BUG: A few more stragglers. --- pandas/core/nanops.py | 3 ++- pandas/tests/indexes/common.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a53bada5cf744..a76e348b7dee2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -582,7 +582,8 @@ def nankurt(values, axis=None, skipna=True): if denom == 0: return 0 - result = numer / denom - adj + with np.errstate(invalid='ignore', divide='ignore'): + result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ed3584f1728e6..59b98ebcff82a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -709,7 +709,8 @@ def test_numpy_ufuncs(self): # raise TypeError or ValueError (PeriodIndex) # PeriodIndex behavior should be changed in future version with tm.assertRaises(Exception): - func(idx) + with np.errstate(all='ignore'): + func(idx) elif isinstance(idx, (Float64Index, Int64Index)): # coerces to float (e.g. np.sin) with np.errstate(all='ignore'): @@ -723,7 +724,8 @@ def test_numpy_ufuncs(self): continue else: with tm.assertRaises(Exception): - func(idx) + with np.errstate(all='ignore'): + func(idx) for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): From a59cfa75016ad2742a3e7b87949422d0c13efee9 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 12 May 2016 14:48:33 +0100 Subject: [PATCH 08/14] ENH: Avoiding the bounds error is better than silencing the warning. --- pandas/computation/align.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 8c70162f31880..4e12d58a4ab85 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -95,8 +95,7 @@ def _align_core(terms): term_axis_size = len(ti.axes[axis]) reindexer_size = len(reindexer) - with np.errstate(divide='ignore'): - ordm = np.log10(abs(reindexer_size - term_axis_size)) + ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) if ordm >= 1 and reindexer_size >= 10000: warnings.warn('Alignment difference on axis {0} is larger ' 'than an order of magnitude on term {1!r}, ' From e7adc03613844f78e5541aa35af4051fab4465eb Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Wed, 10 Aug 2016 22:49:34 -0500 Subject: [PATCH 09/14] BUG: wrong function. --- pandas/sparse/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 0b3128acc3c9d..f906813b730a4 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -103,7 +103,7 @@ def _sparse_array_op(left, right, op, name, series=False): result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: - with np.seterr(all='ignore'): + with np.errstate(all='ignore'): result = op(left.get_values(), right.get_values()) fill = op(_get_fill(left), _get_fill(right)) @@ -112,7 +112,7 @@ def _sparse_array_op(left, right, op, name, series=False): else: index = right.sp_index elif left.sp_index.equals(right.sp_index): - with np.seterr(all='ignore'): + with np.errstate(all='ignore'): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index From bf1f66235a1b3839a539bcb07af4e41647659f8d Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 11 Aug 2016 12:15:40 -0500 Subject: [PATCH 10/14] BUG: New fixes after master rebase. --- pandas/formats/format.py | 16 +-- pandas/sparse/array.py | 3 +- pandas/sparse/series.py | 3 +- pandas/sparse/tests/test_arithmetics.py | 132 +++++++++++++----------- pandas/tests/test_groupby.py | 4 +- 5 files changed, 85 insertions(+), 73 deletions(-) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 0613d59bedd8e..b83e3c4e73fdb 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2094,12 +2094,11 @@ def format_values_with(float_format): else: too_long = False - abs_vals = np.abs(self.values) - - # this is pretty arbitrary for now - # large values: more that 8 characters including decimal symbol - # and first digit, hence > 1e6 with np.errstate(invalid='ignore'): + abs_vals = np.abs(self.values) + # this is pretty arbitrary for now + # large values: more that 8 characters including decimal symbol + # and first digit, hence > 1e6 has_large_values = (abs_vals > 1e6).any() has_small_values = ((abs_vals < 10**(-self.digits)) & (abs_vals > 0)).any() @@ -2212,9 +2211,10 @@ def format_percentiles(percentiles): percentiles = np.asarray(percentiles) # It checks for np.NaN as well - if not is_numeric_dtype(percentiles) or not np.all(percentiles >= 0) \ - or not np.all(percentiles <= 1): - raise ValueError("percentiles should all be in the interval [0,1]") + with np.errstate(invalid='ignore'): + if not is_numeric_dtype(percentiles) or not np.all(percentiles >= 0) \ + or not np.all(percentiles <= 1): + raise ValueError("percentiles should all be in the interval [0,1]") percentiles = 100 * percentiles int_idx = (percentiles.astype(int) == percentiles) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index f906813b730a4..db4b5a4d8324b 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -293,7 +293,8 @@ def __array_wrap__(self, out_arr, context=None): ufunc, args, domain = context # to apply ufunc only to fill_value (to avoid recursive call) args = [getattr(a, 'fill_value', a) for a in args] - fill_value = ufunc(self.fill_value, *args[1:]) + with np.errstate(all='ignore'): + fill_value = ufunc(self.fill_value, *args[1:]) else: fill_value = self.fill_value diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 99b058097af02..01f3c2d00ce24 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -311,7 +311,8 @@ def __array_wrap__(self, result, context=None): if isinstance(context, tuple) and len(context) == 3: ufunc, args, domain = context args = [getattr(a, 'fill_value', a) for a in args] - fill_value = ufunc(self.fill_value, *args[1:]) + with np.errstate(all='ignore'): + fill_value = ufunc(self.fill_value, *args[1:]) else: fill_value = self.fill_value diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/sparse/tests/test_arithmetics.py index ec8bc4d8634e6..5a95d0651498e 100644 --- a/pandas/sparse/tests/test_arithmetics.py +++ b/pandas/sparse/tests/test_arithmetics.py @@ -14,55 +14,59 @@ def _assert(self, a, b): tm.assert_numpy_array_equal(a, b) def _check_numeric_ops(self, a, b, a_dense, b_dense): - # sparse & sparse - self._assert((a + b).to_dense(), a_dense + b_dense) - self._assert((b + a).to_dense(), b_dense + a_dense) + with np.errstate(invalid='ignore', divide='ignore'): + # Unfortunately, trying to wrap the computation of each expected + # value is with np.errstate() is too tedious. - self._assert((a - b).to_dense(), a_dense - b_dense) - self._assert((b - a).to_dense(), b_dense - a_dense) + # sparse & sparse + self._assert((a + b).to_dense(), a_dense + b_dense) + self._assert((b + a).to_dense(), b_dense + a_dense) - self._assert((a * b).to_dense(), a_dense * b_dense) - self._assert((b * a).to_dense(), b_dense * a_dense) + self._assert((a - b).to_dense(), a_dense - b_dense) + self._assert((b - a).to_dense(), b_dense - a_dense) - # pandas uses future division - self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense) - self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense) + self._assert((a * b).to_dense(), a_dense * b_dense) + self._assert((b * a).to_dense(), b_dense * a_dense) - # ToDo: FIXME in GH 13843 - if not (self._base == pd.Series and a.dtype == 'int64'): - self._assert((a // b).to_dense(), a_dense // b_dense) - self._assert((b // a).to_dense(), b_dense // a_dense) + # pandas uses future division + self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense) + self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense) - self._assert((a % b).to_dense(), a_dense % b_dense) - self._assert((b % a).to_dense(), b_dense % a_dense) + # ToDo: FIXME in GH 13843 + if not (self._base == pd.Series and a.dtype == 'int64'): + self._assert((a // b).to_dense(), a_dense // b_dense) + self._assert((b // a).to_dense(), b_dense // a_dense) - self._assert((a ** b).to_dense(), a_dense ** b_dense) - self._assert((b ** a).to_dense(), b_dense ** a_dense) + self._assert((a % b).to_dense(), a_dense % b_dense) + self._assert((b % a).to_dense(), b_dense % a_dense) - # sparse & dense - self._assert((a + b_dense).to_dense(), a_dense + b_dense) - self._assert((b_dense + a).to_dense(), b_dense + a_dense) + self._assert((a ** b).to_dense(), a_dense ** b_dense) + self._assert((b ** a).to_dense(), b_dense ** a_dense) + + # sparse & dense + self._assert((a + b_dense).to_dense(), a_dense + b_dense) + self._assert((b_dense + a).to_dense(), b_dense + a_dense) - self._assert((a - b_dense).to_dense(), a_dense - b_dense) - self._assert((b_dense - a).to_dense(), b_dense - a_dense) + self._assert((a - b_dense).to_dense(), a_dense - b_dense) + self._assert((b_dense - a).to_dense(), b_dense - a_dense) - self._assert((a * b_dense).to_dense(), a_dense * b_dense) - self._assert((b_dense * a).to_dense(), b_dense * a_dense) + self._assert((a * b_dense).to_dense(), a_dense * b_dense) + self._assert((b_dense * a).to_dense(), b_dense * a_dense) - # pandas uses future division - self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense) - self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense) + # pandas uses future division + self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense) + self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense) - # ToDo: FIXME in GH 13843 - if not (self._base == pd.Series and a.dtype == 'int64'): - self._assert((a // b_dense).to_dense(), a_dense // b_dense) - self._assert((b_dense // a).to_dense(), b_dense // a_dense) + # ToDo: FIXME in GH 13843 + if not (self._base == pd.Series and a.dtype == 'int64'): + self._assert((a // b_dense).to_dense(), a_dense // b_dense) + self._assert((b_dense // a).to_dense(), b_dense // a_dense) - self._assert((a % b_dense).to_dense(), a_dense % b_dense) - self._assert((b_dense % a).to_dense(), b_dense % a_dense) + self._assert((a % b_dense).to_dense(), a_dense % b_dense) + self._assert((b_dense % a).to_dense(), b_dense % a_dense) - self._assert((a ** b_dense).to_dense(), a_dense ** b_dense) - self._assert((b_dense ** a).to_dense(), b_dense ** a_dense) + self._assert((a ** b_dense).to_dense(), a_dense ** b_dense) + self._assert((b_dense ** a).to_dense(), b_dense ** a_dense) def _check_bool_result(self, res): tm.assertIsInstance(res, self._klass) @@ -70,43 +74,47 @@ def _check_bool_result(self, res): self.assertIsInstance(res.fill_value, bool) def _check_comparison_ops(self, a, b, a_dense, b_dense): - # sparse & sparse - self._check_bool_result(a == b) - self._assert((a == b).to_dense(), a_dense == b_dense) + with np.errstate(invalid='ignore'): + # Unfortunately, trying to wrap the computation of each expected + # value is with np.errstate() is too tedious. + # + # sparse & sparse + self._check_bool_result(a == b) + self._assert((a == b).to_dense(), a_dense == b_dense) - self._check_bool_result(a != b) - self._assert((a != b).to_dense(), a_dense != b_dense) + self._check_bool_result(a != b) + self._assert((a != b).to_dense(), a_dense != b_dense) - self._check_bool_result(a >= b) - self._assert((a >= b).to_dense(), a_dense >= b_dense) + self._check_bool_result(a >= b) + self._assert((a >= b).to_dense(), a_dense >= b_dense) - self._check_bool_result(a <= b) - self._assert((a <= b).to_dense(), a_dense <= b_dense) + self._check_bool_result(a <= b) + self._assert((a <= b).to_dense(), a_dense <= b_dense) - self._check_bool_result(a > b) - self._assert((a > b).to_dense(), a_dense > b_dense) + self._check_bool_result(a > b) + self._assert((a > b).to_dense(), a_dense > b_dense) - self._check_bool_result(a < b) - self._assert((a < b).to_dense(), a_dense < b_dense) + self._check_bool_result(a < b) + self._assert((a < b).to_dense(), a_dense < b_dense) - # sparse & dense - self._check_bool_result(a == b_dense) - self._assert((a == b_dense).to_dense(), a_dense == b_dense) + # sparse & dense + self._check_bool_result(a == b_dense) + self._assert((a == b_dense).to_dense(), a_dense == b_dense) - self._check_bool_result(a != b_dense) - self._assert((a != b_dense).to_dense(), a_dense != b_dense) + self._check_bool_result(a != b_dense) + self._assert((a != b_dense).to_dense(), a_dense != b_dense) - self._check_bool_result(a >= b_dense) - self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) + self._check_bool_result(a >= b_dense) + self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) - self._check_bool_result(a <= b_dense) - self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) + self._check_bool_result(a <= b_dense) + self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) - self._check_bool_result(a > b_dense) - self._assert((a > b_dense).to_dense(), a_dense > b_dense) + self._check_bool_result(a > b_dense) + self._assert((a > b_dense).to_dense(), a_dense > b_dense) - self._check_bool_result(a < b_dense) - self._assert((a < b_dense).to_dense(), a_dense < b_dense) + self._check_bool_result(a < b_dense) + self._assert((a < b_dense).to_dense(), a_dense < b_dense) def _check_logical_ops(self, a, b, a_dense, b_dense): # sparse & sparse diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 6bf1a397c8482..9a82332621933 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2595,9 +2595,11 @@ def test_cython_fail_agg(self): def test_apply_series_to_frame(self): def f(piece): + with np.errstate(invalid='ignore'): + logged = np.log(piece) return DataFrame({'value': piece, 'demeaned': piece - piece.mean(), - 'logged': np.log(piece)}) + 'logged': logged}) dr = bdate_range('1/1/2000', periods=100) ts = Series(np.random.randn(100), index=dr) From 1fe1bc2e7e73bd727aba32afc424af8425885196 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 11 Aug 2016 14:35:46 -0500 Subject: [PATCH 11/14] pep8 --- pandas/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 68d4cf846b699..e4e5a4e4cfec7 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3304,7 +3304,8 @@ def _evaluate_compare(self, other): if is_object_dtype(self) and self.nlevels == 1: # don't pass MultiIndex with np.errstate(all='ignore'): - result = _comp_method_OBJECT_ARRAY(op, self.values, other) + result = _comp_method_OBJECT_ARRAY( + op, self.values, other) else: with np.errstate(all='ignore'): result = op(self.values, np.asarray(other)) From 44805db689d82bdd1f0e3badb3e4a806793f5c49 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Fri, 19 Aug 2016 18:10:10 +0100 Subject: [PATCH 12/14] ENH: Rearrange expression to avoid generating a warning that would need to be silenced. --- pandas/core/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 83266a99cde8a..9436257b88941 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4369,8 +4369,8 @@ def _get_group_index_sorter(group_index, ngroups): count = len(group_index) alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters - with np.errstate(divide='ignore', invalid='ignore'): - do_groupsort = alpha + beta * ngroups < count * np.log(count) + do_groupsort = (count > 0 and ((alpha + beta * ngroups) < + (count * np.log(count)))) if do_groupsort: sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), ngroups) From 7fd2e86f480974656f5330de370324c6da852407 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Fri, 19 Aug 2016 19:02:05 +0100 Subject: [PATCH 13/14] ENH: More whatsnew documentation. --- doc/source/whatsnew/v0.19.0.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index cc3cc631b9575..cf60a7fed3ac2 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -7,6 +7,10 @@ This is a major release from 0.18.1 and includes a small number of API changes, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. +.. warning:: + + pandas >= 0.19.0 will no longer silence numpy ufunc warnings upon import, see :ref:`here `. (:issue:`13109`, :issue:`13145`) + Highlights include: - :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` @@ -357,6 +361,15 @@ Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). +.. _whatsnew_0190.errstate: + +Fine-grained numpy errstate +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported (:issue:`13109`). Pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as NaNs. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner only around where these operations are actually used in the pandas codebase. + +After upgrading pandas, you may see "new" ``RuntimeWarnings`` being issued from your code. These are likely legitimate, and the underlying cause likely existed in the code when using previous versions of pandas that simply silenced the warning. Use `numpy.errstate `__ around the source of the ``RuntimeWarning`` to control how these conditions are handled. + .. _whatsnew_0190.enhancements.other: Other enhancements From ef9c00130c5353b72a19e4357b7926ab4a6a0290 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Sat, 20 Aug 2016 19:40:43 +0100 Subject: [PATCH 14/14] BUG: whoops, wrong function. --- pandas/sparse/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index db4b5a4d8324b..93470e5602edb 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -133,7 +133,7 @@ def _sparse_array_op(left, right, op, name, series=False): right_sp_values = right.sp_values sparse_op = getattr(splib, opname) - with np.seterr(all='ignore'): + with np.errstate(all='ignore'): result, index, fill = sparse_op(left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value)