Skip to content

Commit 3b8524e

Browse files
committed
TST/CLN: Catch more warnings
Catches a handful of warnings from the dask.dataframe test suite. Remaining warnings fall into a few classes: 1. ResoucreWarnings from cloudpickls (e.g. dask/dataframe/tests/test_arithmetics_reduction.py::test_frame_series_arithmetic_methods) Not sure what to do here. Probably a legitimate concern of dasks? 2. RuntimeWarnings from numpy (e.g. dask/dataframe/tests/test_arithmetics_reduction.py::test_reductions[False] I think dask should catch these, (followup PR) 3. ImportWarning from dependencies importing C code? e.g. dask/dataframe/tests/test_categorical.py::test_categorical_set_index[disk] I have a fix for partd (msgpack). Pandas has some too. Still investigating, may be a Cython issue 4. DeprecationWarning on regexes. e.g. dask/dataframe/tests/test_groupby.py::test_full_groupby_multilevel[grouper4] I think these are all pandas. xref pandas-dev/pandas#16481 (comment) 5. RuntimeWarning from pandas merge, e.g. dask/dataframe/tests/test_multi.py::test_merge_by_multiple_columns[disk-inner] Filing an issue on pandas
1 parent 522c384 commit 3b8524e

10 files changed

+243
-167
lines changed

dask/dataframe/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ def apply(self, func, meta=no_default):
911911
" Before: .apply(func)\n"
912912
" After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n"
913913
" or: .apply(func, meta=('x', 'f8')) for series result")
914-
warnings.warn(msg)
914+
warnings.warn(msg, stacklevel=2)
915915

916916
with raise_on_meta_error("groupby.apply({0})".format(funcname(func))):
917917
meta = self._meta_nonempty.apply(func)

dask/dataframe/io/tests/test_csv.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -436,11 +436,12 @@ def test_warn_non_seekable_files():
436436
assert 'blocksize=None' in msg
437437

438438
with pytest.warns(None) as w:
439-
df = dd.read_csv('2014-01-*.csv', compression='gzip', blocksize=None)
439+
df = dd.read_csv('2014-01-*.csv', compression='gzip',
440+
blocksize=None)
440441
assert len(w) == 0
441442

442443
with pytest.raises(NotImplementedError):
443-
with pytest.warns(None):
444+
with pytest.warns(UserWarning): # needed for pytest
444445
df = dd.read_csv('2014-01-*.csv', compression='foo')
445446

446447

@@ -730,8 +731,8 @@ def test_read_csv_sep():
730731
charlie###300""")
731732

732733
with filetext(sep_text) as fn:
733-
ddf = dd.read_csv(fn, sep="###")
734-
df = pd.read_csv(fn, sep="###")
734+
ddf = dd.read_csv(fn, sep="###", engine="python")
735+
df = pd.read_csv(fn, sep="###", engine="python")
735736

736737
assert (df.columns == ddf.columns).all()
737738
assert len(df) == len(ddf)

dask/dataframe/io/tests/test_io.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -310,12 +310,14 @@ def test_from_pandas_single_row():
310310
assert_eq(ddf, df)
311311

312312

313+
@pytest.mark.skipif(np.__version__ < '1.11',
314+
reason='datetime unit unsupported in NumPy < 1.11')
313315
def test_from_pandas_with_datetime_index():
314316
df = pd.DataFrame({"Date": ["2015-08-28", "2015-08-27", "2015-08-26",
315317
"2015-08-25", "2015-08-24", "2015-08-21",
316318
"2015-08-20", "2015-08-19", "2015-08-18"],
317319
"Val": list(range(9))})
318-
df.Date = df.Date.astype('datetime64')
320+
df.Date = df.Date.astype('datetime64[ns]')
319321
ddf = dd.from_pandas(df, 2)
320322
assert_eq(df, ddf)
321323
ddf = dd.from_pandas(df, chunksize=2)

dask/dataframe/methods.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import print_function, absolute_import, division
22

3+
import warnings
4+
35
import numpy as np
46
import pandas as pd
57
from pandas.api.types import is_categorical_dtype
@@ -253,8 +255,11 @@ def concat(dfs, axis=0, join='outer', uniform=False):
253255
# concatenates.
254256
dfs3 = [df if isinstance(df, pd.DataFrame) else
255257
df.to_frame().rename(columns={df.name: 0}) for df in dfs2]
256-
cat_mask = pd.concat([(df.dtypes == 'category').to_frame().T
257-
for df in dfs3], join=join).any()
258+
# pandas may raise a RuntimeWarning for comparing ints and strs
259+
with warnings.catch_warnings():
260+
warnings.simplefilter("ignore", RuntimeWarning)
261+
cat_mask = pd.concat([(df.dtypes == 'category').to_frame().T
262+
for df in dfs3], join=join).any()
258263

259264
if cat_mask.any():
260265
not_cat = cat_mask[~cat_mask].index
@@ -280,7 +285,10 @@ def concat(dfs, axis=0, join='outer', uniform=False):
280285
out[col] = union_categoricals(parts)
281286
out = out.reindex_axis(cat_mask.index, axis=1)
282287
else:
283-
out = pd.concat(dfs3, join=join)
288+
# pandas may raise a RuntimeWarning for comparing ints and strs
289+
with warnings.catch_warnings():
290+
warnings.simplefilter("ignore", RuntimeWarning)
291+
out = pd.concat(dfs3, join=join)
284292
else:
285293
if is_categorical_dtype(dfs2[0].dtype):
286294
if ind is None:

dask/dataframe/tests/test_arithmetics_reduction.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -635,9 +635,15 @@ def test_reductions(split_every):
635635
assert_eq(dds.min(split_every=split_every), pds.min())
636636
assert_eq(dds.max(split_every=split_every), pds.max())
637637
assert_eq(dds.count(split_every=split_every), pds.count())
638-
assert_eq(dds.std(split_every=split_every), pds.std())
639-
assert_eq(dds.var(split_every=split_every), pds.var())
640-
assert_eq(dds.sem(split_every=split_every), pds.sem())
638+
with pytest.warns(None):
639+
# runtime warnings; https://github.com/dask/dask/issues/2381
640+
assert_eq(dds.std(split_every=split_every), pds.std())
641+
with pytest.warns(None):
642+
# runtime warnings; https://github.com/dask/dask/issues/2381
643+
assert_eq(dds.var(split_every=split_every), pds.var())
644+
with pytest.warns(None):
645+
# runtime warnings; https://github.com/dask/dask/issues/2381
646+
assert_eq(dds.sem(split_every=split_every), pds.sem())
641647
assert_eq(dds.std(ddof=0, split_every=split_every), pds.std(ddof=0))
642648
assert_eq(dds.var(ddof=0, split_every=split_every), pds.var(ddof=0))
643649
assert_eq(dds.sem(ddof=0, split_every=split_every), pds.sem(ddof=0))

dask/dataframe/tests/test_dataframe.py

+36-13
Original file line numberDiff line numberDiff line change
@@ -1381,9 +1381,10 @@ def test_eval():
13811381
with pytest.raises(NotImplementedError):
13821382
d.eval('z = x + y', inplace=True)
13831383

1384-
if p.eval('z = x + y', inplace=None) is None:
1385-
with pytest.raises(NotImplementedError):
1386-
d.eval('z = x + y', inplace=None)
1384+
with pytest.warns(None):
1385+
if p.eval('z = x + y', inplace=None) is None:
1386+
with pytest.raises(NotImplementedError):
1387+
d.eval('z = x + y', inplace=None)
13871388

13881389

13891390
@pytest.mark.parametrize('include, exclude', [
@@ -1702,7 +1703,7 @@ def test_apply():
17021703
ddf = dd.from_pandas(df, npartitions=2)
17031704

17041705
func = lambda row: row['x'] + row['y']
1705-
assert_eq(ddf.x.apply(lambda x: x + 1),
1706+
assert_eq(ddf.x.apply(lambda x: x + 1, meta=("x", int)),
17061707
df.x.apply(lambda x: x + 1))
17071708

17081709
# specify meta
@@ -1712,16 +1713,19 @@ def test_apply():
17121713
df.apply(lambda xy: xy[0] + xy[1], axis='columns'))
17131714

17141715
# inference
1715-
assert_eq(ddf.apply(lambda xy: xy[0] + xy[1], axis=1),
1716-
df.apply(lambda xy: xy[0] + xy[1], axis=1))
1717-
assert_eq(ddf.apply(lambda xy: xy, axis=1),
1718-
df.apply(lambda xy: xy, axis=1))
1716+
with pytest.warns(None):
1717+
assert_eq(ddf.apply(lambda xy: xy[0] + xy[1], axis=1),
1718+
df.apply(lambda xy: xy[0] + xy[1], axis=1))
1719+
with pytest.warns(None):
1720+
assert_eq(ddf.apply(lambda xy: xy, axis=1),
1721+
df.apply(lambda xy: xy, axis=1))
17191722

17201723
# specify meta
17211724
func = lambda x: pd.Series([x, x])
17221725
assert_eq(ddf.x.apply(func, meta=[(0, int), (1, int)]), df.x.apply(func))
17231726
# inference
1724-
assert_eq(ddf.x.apply(func), df.x.apply(func))
1727+
with pytest.warns(None):
1728+
assert_eq(ddf.x.apply(func), df.x.apply(func))
17251729

17261730
# axis=0
17271731
with pytest.raises(NotImplementedError):
@@ -1731,6 +1735,21 @@ def test_apply():
17311735
ddf.apply(lambda xy: xy, axis='index')
17321736

17331737

1738+
def test_apply_warns():
1739+
df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]})
1740+
ddf = dd.from_pandas(df, npartitions=2)
1741+
1742+
func = lambda row: row['x'] + row['y']
1743+
1744+
with pytest.warns(UserWarning) as w:
1745+
ddf.apply(func, axis=1)
1746+
assert len(w) == 1
1747+
1748+
with pytest.warns(None) as w:
1749+
ddf.apply(func, axis=1, meta=(None, int))
1750+
assert len(w) == 0
1751+
1752+
17341753
def test_applymap():
17351754
df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]})
17361755
ddf = dd.from_pandas(df, npartitions=2)
@@ -1886,13 +1905,15 @@ def return_df(x):
18861905
return pd.Series([x.sum(), x.mean()], index=['sum', 'mean'])
18871906

18881907
# DataFrame to completely different DataFrame
1889-
result = ddf.apply(return_df, axis=1)
1908+
with pytest.warns(None):
1909+
result = ddf.apply(return_df, axis=1)
18901910
assert isinstance(result, dd.DataFrame)
18911911
tm.assert_index_equal(result.columns, pd.Index(['sum', 'mean']))
18921912
assert_eq(result, df.apply(return_df, axis=1))
18931913

18941914
# DataFrame to Series
1895-
result = ddf.apply(lambda x: 1, axis=1)
1915+
with pytest.warns(None):
1916+
result = ddf.apply(lambda x: 1, axis=1)
18961917
assert isinstance(result, dd.Series)
18971918
assert result.name is None
18981919
assert_eq(result, df.apply(lambda x: 1, axis=1))
@@ -1901,13 +1922,15 @@ def return_df2(x):
19011922
return pd.Series([x * 2, x * 3], index=['x2', 'x3'])
19021923

19031924
# Series to completely different DataFrame
1904-
result = ddf.x.apply(return_df2)
1925+
with pytest.warns(None):
1926+
result = ddf.x.apply(return_df2)
19051927
assert isinstance(result, dd.DataFrame)
19061928
tm.assert_index_equal(result.columns, pd.Index(['x2', 'x3']))
19071929
assert_eq(result, df.x.apply(return_df2))
19081930

19091931
# Series to Series
1910-
result = ddf.x.apply(lambda x: 1)
1932+
with pytest.warns(None):
1933+
result = ddf.x.apply(lambda x: 1)
19111934
assert isinstance(result, dd.Series)
19121935
assert result.name == 'x'
19131936
assert_eq(result, df.x.apply(lambda x: 1))

0 commit comments

Comments
 (0)