Skip to content

Commit 34660a2

Browse files
authored
Better error message on metadata inference failure (dask#1598)
Previously we'd just throw an error stating that metadata inference failed. This made debugging the cause of these failures tricky. Now we raise an error indicating that metadata inference failed, and include the original error and traceback as well.
1 parent b7fd210 commit 34660a2

File tree

4 files changed

+71
-54
lines changed

4 files changed

+71
-54
lines changed

dask/dataframe/core.py

Lines changed: 21 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
from . import methods
3232
from .indexing import (_partition_of_index_value, _loc, _try_loc,
3333
_coerce_loc_index, _maybe_partial_time_string)
34-
from .utils import meta_nonempty, make_meta, insert_meta_param_description
34+
from .utils import (meta_nonempty, make_meta, insert_meta_param_description,
35+
raise_on_meta_error)
3536

3637
no_default = '__no_default__'
3738

@@ -157,11 +158,7 @@ def _get_unary_operator(cls, op):
157158
def f(self):
158159
name = funcname(op) + '-' + tokenize(self)
159160
dsk = {(name, 0): (op, (self._name, 0))}
160-
try:
161-
meta = op(self._meta_nonempty)
162-
except:
163-
raise ValueError("Metadata inference failed in operator "
164-
"{0}.".format(funcname(op)))
161+
meta = op(self._meta_nonempty)
165162
return Scalar(merge(dsk, self.dask), name, meta)
166163
return f
167164

@@ -189,16 +186,12 @@ def _scalar_binary(op, self, other, inv=False):
189186
else:
190187
dsk.update({(name, 0): (op, (self._name, 0), other_key)})
191188

192-
try:
193-
other_meta = make_meta(other)
194-
other_meta_nonempty = meta_nonempty(other_meta)
195-
if inv:
196-
meta = op(other_meta_nonempty, self._meta_nonempty)
197-
else:
198-
meta = op(self._meta_nonempty, other_meta_nonempty)
199-
except:
200-
raise ValueError("Metadata inference failed in operator "
201-
"{0}.".format(funcname(op)))
189+
other_meta = make_meta(other)
190+
other_meta_nonempty = meta_nonempty(other_meta)
191+
if inv:
192+
meta = op(other_meta_nonempty, self._meta_nonempty)
193+
else:
194+
meta = op(self._meta_nonempty, other_meta_nonempty)
202195

203196
if return_type is not Scalar:
204197
return return_type(dsk, name, meta,
@@ -1631,11 +1624,7 @@ def map(self, arg, na_action=None, meta=no_default):
16311624
enumerate(self._keys()))
16321625
dsk.update(self.dask)
16331626
if meta is no_default:
1634-
try:
1635-
meta = self._meta_nonempty.map(arg, na_action=na_action)
1636-
except Exception:
1637-
raise ValueError("Metadata inference failed, please provide "
1638-
"`meta` keyword")
1627+
meta = _emulate(M.map, self, arg, na_action=na_action)
16391628
else:
16401629
meta = make_meta(meta)
16411630

@@ -1761,13 +1750,9 @@ def apply(self, func, convert_dtype=True, meta=no_default,
17611750
" or: .apply(func, meta=('x', 'f8')) for series result")
17621751
warnings.warn(msg)
17631752

1764-
try:
1765-
meta = _emulate(M.apply, self._meta_nonempty, func,
1766-
convert_dtype=convert_dtype,
1767-
args=args, **kwds)
1768-
except Exception:
1769-
raise ValueError("Metadata inference failed, please provide "
1770-
"`meta` keyword")
1753+
meta = _emulate(M.apply, self._meta_nonempty, func,
1754+
convert_dtype=convert_dtype,
1755+
args=args, **kwds)
17711756

17721757
return map_partitions(M.apply, self, func,
17731758
convert_dtype, args, meta=meta, **kwds)
@@ -2350,12 +2335,8 @@ def apply(self, func, axis=0, args=(), meta=no_default,
23502335
" or: .apply(func, meta=('x', 'f8')) for series result")
23512336
warnings.warn(msg)
23522337

2353-
try:
2354-
meta = _emulate(M.apply, self._meta_nonempty, func,
2355-
axis=axis, args=args, **kwds)
2356-
except Exception:
2357-
raise ValueError("Metadata inference failed, please provide "
2358-
"`meta` keyword")
2338+
meta = _emulate(M.apply, self._meta_nonempty, func,
2339+
axis=axis, args=args, **kwds)
23592340

23602341
return map_partitions(M.apply, self, func, axis,
23612342
False, False, None, args, meta=meta, **kwds)
@@ -2605,13 +2586,9 @@ def apply_concat_apply(args, chunk=None, aggregate=None, meta=no_default,
26052586
dsk2 = {(b, 0): (apply, aggregate, [conc], aggregate_kwargs)}
26062587

26072588
if meta is no_default:
2608-
try:
2609-
meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
2610-
meta = _emulate(apply, aggregate, [_concat([meta_chunk])],
2611-
aggregate_kwargs)
2612-
except Exception:
2613-
raise ValueError("Metadata inference failed, please provide "
2614-
"`meta` keyword")
2589+
meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
2590+
meta = _emulate(apply, aggregate, [_concat([meta_chunk])],
2591+
aggregate_kwargs)
26152592
meta = make_meta(meta)
26162593

26172594
dasks = [arg.dask for arg in args if isinstance(arg, _Frame)]
@@ -2645,7 +2622,8 @@ def _emulate(func, *args, **kwargs):
26452622
Apply a function using args / kwargs. If arguments contain dd.DataFrame /
26462623
dd.Series, using internal cache (``_meta``) for calculation
26472624
"""
2648-
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
2625+
with raise_on_meta_error(funcname(func)):
2626+
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
26492627

26502628

26512629
@insert_meta_param_description
@@ -2678,11 +2656,7 @@ def map_partitions(func, *args, **kwargs):
26782656
args = _maybe_align_partitions(args)
26792657

26802658
if meta is no_default:
2681-
try:
2682-
meta = _emulate(func, *args, **kwargs)
2683-
except Exception:
2684-
raise ValueError("Metadata inference failed, please provide "
2685-
"`meta` keyword")
2659+
meta = _emulate(func, *args, **kwargs)
26862660

26872661
if all(isinstance(arg, Scalar) for arg in args):
26882662
dask = {(name, 0):

dask/dataframe/groupby.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
from .core import DataFrame, Series, Index, aca, map_partitions, no_default
99
from .shuffle import shuffle
10-
from .utils import make_meta, insert_meta_param_description
11-
from ..utils import derived_from, M
10+
from .utils import make_meta, insert_meta_param_description, raise_on_meta_error
11+
from ..utils import derived_from, M, funcname
1212

1313

1414
def _maybe_slice(grouped, columns):
@@ -324,11 +324,8 @@ def apply(self, func, meta=no_default, columns=no_default):
324324
" or: .apply(func, meta=('x', 'f8')) for series result")
325325
warnings.warn(msg)
326326

327-
try:
327+
with raise_on_meta_error("groupby.apply({0})".format(funcname(func))):
328328
meta = self._meta_nonempty.apply(func)
329-
except:
330-
raise ValueError("Metadata inference failed, please provide "
331-
"`meta` keyword")
332329
else:
333330
meta = make_meta(meta)
334331

dask/dataframe/tests/test_utils_dataframe.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
22
import pandas as pd
33
import dask.dataframe as dd
4-
from dask.dataframe.utils import shard_df_on_index, meta_nonempty, make_meta
4+
from dask.dataframe.utils import (shard_df_on_index, meta_nonempty, make_meta,
5+
raise_on_meta_error)
56

67
import pytest
78

@@ -187,3 +188,19 @@ def test_meta_nonempty_scalar():
187188
x = pd.Timestamp(2000, 1, 1)
188189
meta = meta_nonempty(x)
189190
assert meta is x
191+
192+
193+
def test_raise_on_meta_error():
194+
try:
195+
with raise_on_meta_error():
196+
raise RuntimeError("Bad stuff")
197+
except Exception as e:
198+
assert e.args[0].startswith("Metadata inference failed.\n")
199+
assert 'RuntimeError' in e.args[0]
200+
201+
try:
202+
with raise_on_meta_error("myfunc"):
203+
raise RuntimeError("Bad stuff")
204+
except Exception as e:
205+
assert e.args[0].startswith("Metadata inference failed in `myfunc`.\n")
206+
assert 'RuntimeError' in e.args[0]

dask/dataframe/utils.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from collections import Iterator
77
import sys
8+
import traceback
9+
from contextlib import contextmanager
810

911
import numpy as np
1012
import pandas as pd
@@ -130,6 +132,33 @@ def insert_meta_param_description(*args, **kwargs):
130132
return f
131133

132134

135+
@contextmanager
136+
def raise_on_meta_error(funcname=None):
137+
"""Reraise errors in this block to show metadata inference failure.
138+
139+
Parameters
140+
----------
141+
funcname : str, optional
142+
If provided, will be added to the error message to indicate the
143+
name of the method that failed.
144+
"""
145+
try:
146+
yield
147+
except Exception as e:
148+
exc_type, exc_value, exc_traceback = sys.exc_info()
149+
tb = ''.join(traceback.format_tb(exc_traceback))
150+
msg = ("Metadata inference failed{0}.\n\n"
151+
"Original error is below:\n"
152+
"------------------------\n"
153+
"{1}\n\n"
154+
"Traceback:\n"
155+
"---------\n"
156+
"{2}"
157+
).format(" in `{0}`".format(funcname) if funcname else "",
158+
repr(e), tb)
159+
raise ValueError(msg)
160+
161+
133162
def make_meta(x, index=None):
134163
"""Create an empty pandas object containing the desired metadata.
135164

0 commit comments

Comments
 (0)