Skip to content

Fine-grained errstate handling #13145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
13 changes: 13 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ This is a major release from 0.18.1 and includes a small number of API changes,
enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
users upgrade to this version.

.. warning::

pandas >= 0.19.0 will no longer silence numpy ufunc warnings upon import, see :ref:`here <whatsnew_0190.errstate>`. (:issue:`13109`, :issue:`13145`)

Highlights include:

- :func:`merge_asof` for asof-style time-series joining, see :ref:`here <whatsnew_0190.enhancements.asof_merge>`
Expand Down Expand Up @@ -357,6 +361,15 @@ Google BigQuery Enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs <io.bigquery_reader>` for more details (:issue:`13615`).

.. _whatsnew_0190.errstate:

Fine-grained numpy errstate
^^^^^^^^^^^^^^^^^^^^^^^^^^^

Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported (:issue:`13109`). Pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as NaNs. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner only around where these operations are actually used in the pandas codebase.

After upgrading pandas, you may see "new" ``RuntimeWarnings`` being issued from your code. These are likely legitimate, and the underlying cause likely existed in the code when using previous versions of pandas that simply silenced the warning. Use `numpy.errstate <http://docs.scipy.org/doc/numpy/reference/generated/numpy.errstate.html>`__ around the source of the ``RuntimeWarning`` to control how these conditions are handled.

.. _whatsnew_0190.enhancements.other:

Other enhancements
Expand Down
2 changes: 0 additions & 2 deletions pandas/compat/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from distutils.version import LooseVersion
from pandas.compat import string_types, string_and_binary_types

# turn off all numpy warnings
np.seterr(all='ignore')

# numpy versioning
_np_version = np.version.short_version
Expand Down
2 changes: 1 addition & 1 deletion pandas/computation/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _align_core(terms):
term_axis_size = len(ti.axes[axis])
reindexer_size = len(reindexer)

ordm = np.log10(abs(reindexer_size - term_axis_size))
ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the fix for #13135 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

if ordm >= 1 and reindexer_size >= 10000:
warnings.warn('Alignment difference on axis {0} is larger '
'than an order of magnitude on term {1!r}, '
Expand Down
3 changes: 2 additions & 1 deletion pandas/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
""" standard evaluation """
if _TEST_MODE:
_store_test_result(False)
return op(a, b)
with np.errstate(all='ignore'):
return op(a, b)


def _can_use_numexpr(op, op_str, a, b, dtype_check):
Expand Down
3 changes: 2 additions & 1 deletion pandas/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ def __init__(self, func, args):

def __call__(self, env):
operands = [op(env) for op in self.operands]
return self.func.func(*operands)
with np.errstate(all='ignore'):
return self.func.func(*operands)

def __unicode__(self):
operands = map(str, self.operands)
Expand Down
6 changes: 4 additions & 2 deletions pandas/computation/tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1613,7 +1613,8 @@ def test_unary_functions(self):
for fn in self.unary_fns:
expr = "{0}(a)".format(fn)
got = self.eval(expr)
expect = getattr(np, fn)(a)
with np.errstate(all='ignore'):
expect = getattr(np, fn)(a)
tm.assert_series_equal(got, expect, check_names=False)

def test_binary_functions(self):
Expand All @@ -1624,7 +1625,8 @@ def test_binary_functions(self):
for fn in self.binary_fns:
expr = "{0}(a, b)".format(fn)
got = self.eval(expr)
expect = getattr(np, fn)(a, b)
with np.errstate(all='ignore'):
expect = getattr(np, fn)(a, b)
tm.assert_almost_equal(got, expect, check_names=False)

def test_df_use_case(self):
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3813,7 +3813,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
this = self[col].values
that = other[col].values
if filter_func is not None:
mask = ~filter_func(this) | isnull(that)
with np.errstate(all='ignore'):
mask = ~filter_func(this) | isnull(that)
else:
if raise_conflict:
mask_this = notnull(that)
Expand Down Expand Up @@ -4108,7 +4109,8 @@ def f(x):
return self._apply_empty_result(func, axis, reduce, *args, **kwds)

if isinstance(f, np.ufunc):
results = f(self.values)
with np.errstate(all='ignore'):
results = f(self.values)
return self._constructor(data=results, index=self.index,
columns=self.columns, copy=False)
else:
Expand Down Expand Up @@ -4934,7 +4936,8 @@ def f(x):
"type %s not implemented." %
filter_type)
raise_with_traceback(e)
result = f(data.values)
with np.errstate(all='ignore'):
result = f(data.values)
labels = data._get_agg_axis(axis)
else:
if numeric_only:
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,8 @@ def apply(self, func, *args, **kwargs):

@wraps(func)
def f(g):
return func(g, *args, **kwargs)
with np.errstate(all='ignore'):
return func(g, *args, **kwargs)
else:
raise ValueError('func must be a callable if args or '
'kwargs are supplied')
Expand Down Expand Up @@ -4126,7 +4127,10 @@ def loop(labels, shape):
out = stride * labels[0].astype('i8', subok=False, copy=False)

for i in range(1, nlev):
stride //= shape[i]
if shape[i] == 0:
stride = 0
else:
stride //= shape[i]
out += labels[i] * stride

if xnull: # exclude nulls
Expand Down Expand Up @@ -4365,7 +4369,9 @@ def _get_group_index_sorter(group_index, ngroups):
count = len(group_index)
alpha = 0.0 # taking complexities literally; there may be
beta = 1.0 # some room for fine-tuning these parameters
if alpha + beta * ngroups < count * np.log(count):
do_groupsort = (count > 0 and ((alpha + beta * ngroups) <
(count * np.log(count))))
if do_groupsort:
sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index),
ngroups)
return _ensure_platform_int(sorter)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@ def apply(self, func, mgr=None, **kwargs):
""" apply the function to my values; return a block if we are not
one
"""
result = func(self.values, **kwargs)
with np.errstate(all='ignore'):
result = func(self.values, **kwargs)
if not isinstance(result, Block):
result = self.make_block(values=_block_shape(result,
ndim=self.ndim))
Expand Down Expand Up @@ -1156,7 +1157,8 @@ def handle_error():

# get the result
try:
result = get_result(other)
with np.errstate(all='ignore'):
result = get_result(other)

# if we have an invalid shape/broadcast error
# GH4576, so raise instead of allowing to pass through
Expand Down
24 changes: 15 additions & 9 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def _f(*args, **kwargs):
'this dtype'.format(
f.__name__.replace('nan', '')))
try:
return f(*args, **kwargs)
with np.errstate(invalid='ignore'):
return f(*args, **kwargs)
except ValueError as e:
# we want to transform an object array
# ValueError message to the more typical TypeError
Expand Down Expand Up @@ -513,7 +514,8 @@ def nanskew(values, axis=None, skipna=True):
m2 = _zero_out_fperr(m2)
m3 = _zero_out_fperr(m3)

result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5)
with np.errstate(invalid='ignore', divide='ignore'):
result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5)

dtype = values.dtype
if is_float_dtype(dtype):
Expand Down Expand Up @@ -562,10 +564,11 @@ def nankurt(values, axis=None, skipna=True):
m2 = adjusted2.sum(axis, dtype=np.float64)
m4 = adjusted4.sum(axis, dtype=np.float64)

adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
numer = count * (count + 1) * (count - 1) * m4
denom = (count - 2) * (count - 3) * m2**2
result = numer / denom - adj
with np.errstate(invalid='ignore', divide='ignore'):
adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
numer = count * (count + 1) * (count - 1) * m4
denom = (count - 2) * (count - 3) * m2**2
result = numer / denom - adj

# floating point error
numer = _zero_out_fperr(numer)
Expand All @@ -579,7 +582,8 @@ def nankurt(values, axis=None, skipna=True):
if denom == 0:
return 0

result = numer / denom - adj
with np.errstate(invalid='ignore', divide='ignore'):
result = numer / denom - adj

dtype = values.dtype
if is_float_dtype(dtype):
Expand Down Expand Up @@ -658,7 +662,8 @@ def _maybe_null_out(result, axis, mask):

def _zero_out_fperr(arg):
if isinstance(arg, np.ndarray):
return np.where(np.abs(arg) < 1e-14, 0, arg)
with np.errstate(invalid='ignore'):
return np.where(np.abs(arg) < 1e-14, 0, arg)
else:
return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg

Expand Down Expand Up @@ -760,7 +765,8 @@ def f(x, y):
ymask = isnull(y)
mask = xmask | ymask

result = op(x, y)
with np.errstate(all='ignore'):
result = op(x, y)

if mask.any():
if is_bool_dtype(result):
Expand Down
18 changes: 12 additions & 6 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,8 @@ def na_op(x, y):

def safe_na_op(lvalues, rvalues):
try:
return na_op(lvalues, rvalues)
with np.errstate(all='ignore'):
return na_op(lvalues, rvalues)
except Exception:
if isinstance(rvalues, ABCSeries):
if is_object_dtype(rvalues):
Expand Down Expand Up @@ -743,7 +744,8 @@ def na_op(x, y):
x = x.view('i8')

try:
result = getattr(x, name)(y)
with np.errstate(all='ignore'):
result = getattr(x, name)(y)
if result is NotImplemented:
raise TypeError("invalid type comparison")
except AttributeError:
Expand Down Expand Up @@ -796,13 +798,15 @@ def wrapper(self, other, axis=None):
# which would then not take categories ordering into account
# we can go directly to op, as the na_op would just test again and
# dispatch to it.
res = op(self.values, other)
with np.errstate(all='ignore'):
res = op(self.values, other)
else:
values = self.get_values()
if isinstance(other, (list, np.ndarray)):
other = np.asarray(other)

res = na_op(values, other)
with np.errstate(all='ignore'):
res = na_op(values, other)
if isscalar(res):
raise TypeError('Could not compare %s type with Series' %
type(other))
Expand Down Expand Up @@ -1096,13 +1100,15 @@ def na_op(x, y):
xrav = xrav[mask]
yrav = yrav[mask]
if np.prod(xrav.shape) and np.prod(yrav.shape):
result[mask] = op(xrav, yrav)
with np.errstate(all='ignore'):
result[mask] = op(xrav, yrav)
elif hasattr(x, 'size'):
result = np.empty(x.size, dtype=x.dtype)
mask = notnull(xrav)
xrav = xrav[mask]
if np.prod(xrav.shape):
result[mask] = op(xrav, y)
with np.errstate(all='ignore'):
result[mask] = op(xrav, y)
else:
raise TypeError("cannot perform operation {op} between "
"objects of type {x} and {y}".format(
Expand Down
29 changes: 17 additions & 12 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,8 @@ def _combine(self, other, func, axis=0):
(str(type(other)), str(type(self))))

def _combine_const(self, other, func):
new_values = func(self.values, other)
with np.errstate(all='ignore'):
new_values = func(self.values, other)
d = self._construct_axes_dict()
return self._constructor(new_values, **d)

Expand All @@ -723,14 +724,15 @@ def _combine_frame(self, other, func, axis=0):

other = other.reindex(index=index, columns=columns)

if axis == 0:
new_values = func(self.values, other.values)
elif axis == 1:
new_values = func(self.values.swapaxes(0, 1), other.values.T)
new_values = new_values.swapaxes(0, 1)
elif axis == 2:
new_values = func(self.values.swapaxes(0, 2), other.values)
new_values = new_values.swapaxes(0, 2)
with np.errstate(all='ignore'):
if axis == 0:
new_values = func(self.values, other.values)
elif axis == 1:
new_values = func(self.values.swapaxes(0, 1), other.values.T)
new_values = new_values.swapaxes(0, 1)
elif axis == 2:
new_values = func(self.values.swapaxes(0, 2), other.values)
new_values = new_values.swapaxes(0, 2)

return self._constructor(new_values, self.items, self.major_axis,
self.minor_axis)
Expand All @@ -744,7 +746,8 @@ def _combine_panel(self, other, func):
this = self.reindex(items=items, major=major, minor=minor)
other = other.reindex(items=items, major=major, minor=minor)

result_values = func(this.values, other.values)
with np.errstate(all='ignore'):
result_values = func(this.values, other.values)

return self._constructor(result_values, items, major, minor)

Expand Down Expand Up @@ -1011,7 +1014,8 @@ def apply(self, func, axis='major', **kwargs):
# try ufunc like
if isinstance(f, np.ufunc):
try:
result = np.apply_along_axis(func, axis, self.values)
with np.errstate(all='ignore'):
result = np.apply_along_axis(func, axis, self.values)
return self._wrap_result(result, axis=axis)
except (AttributeError):
pass
Expand Down Expand Up @@ -1113,7 +1117,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
axis_number = self._get_axis_number(axis_name)
f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)

result = f(self.values)
with np.errstate(all='ignore'):
result = f(self.values)

axes = self._get_plane_axes(axis_name)
if result.ndim == 2 and axis_name != self._info_axis_name:
Expand Down
Loading