Skip to content

Commit 4ed6290

Browse files
committed
use pd.isna is array.utils.assert_eq
1 parent 0c3ebf6 commit 4ed6290

File tree

2 files changed

+20
-24
lines changed

2 files changed

+20
-24
lines changed

dask/array/utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import warnings
88

99
import numpy as np
10+
import pandas as pd
1011
from tlz import frequencies, concat
1112

1213
from .core import Array
@@ -110,10 +111,7 @@ def meta_from_array(x, ndim=None, dtype=None):
110111
if (
111112
any(
112113
s in str(e)
113-
for s in [
114-
"invalid literal",
115-
"could not convert string to float",
116-
]
114+
for s in ["invalid literal", "could not convert string to float",]
117115
)
118116
and meta.dtype.kind in "SU"
119117
):
@@ -181,7 +179,10 @@ def allclose(a, b, equal_nan=False, **kwargs):
181179
return np.allclose(a, b, equal_nan=equal_nan, **kwargs)
182180
if equal_nan:
183181
return a.shape == b.shape and all(
184-
np.isnan(b) if np.isnan(a) else a == b for (a, b) in zip(a.flat, b.flat)
182+
# NOTE: use `pd.isna` instead of `np.isnan` to also handle
183+
# pandas NA values which could slip in
184+
pd.isna([a, b]).all() if pd.isna([a, b]).any() else a == b
185+
for (a, b) in zip(a.flat, b.flat)
185186
)
186187
return (a == b).all()
187188

dask/dataframe/tests/test_dataframe.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,16 +3768,9 @@ def test_values_extension_array():
37683768
)
37693769
ddf = dd.from_pandas(df, 2)
37703770

3771-
# HACK: compare the arrays as strings, since `assert_eq` and its NumPy relatives
3772-
# don't know how to handle pandas NA values. Because the dtype ends up being `object`,
3773-
# it's just doing elementwise comparison, and NA != NA, but also NA is not NaN.
3774-
assert str(df.to_numpy()) == str(ddf.values.compute()), "Stringified values are not equal"
3775-
3776-
assert_eq(df.index.values, ddf.index.values)
3771+
assert_eq(df.to_numpy(), ddf.values, equal_nan=True)
3772+
assert_eq(df.index.to_numpy(), ddf.index.values)
37773773
for column in df.columns:
3778-
# FIXME fails on the `null_bool` column with `TypeError: boolean value of NA is ambiguous`
3779-
# from `pandas/_libs/missing.pyx:360` (`__bool__` method on `NAType`). Again, NumPy is doing
3780-
# elementwise == here, which doesn't work the way we want it to on NAs.
37813774
assert_eq(df[column].to_numpy(), ddf[column].values, equal_nan=True)
37823775

37833776

@@ -4093,7 +4086,10 @@ def test_cumulative_multiple_columns():
40934086

40944087

40954088
@pytest.mark.parametrize("func", [np.asarray, M.to_records])
4096-
def test_map_partition_array(func):
4089+
@pytest.mark.parametrize(
4090+
"pre", [lambda a: a, lambda a: a.x, lambda a: a.y, lambda a: a.index]
4091+
)
4092+
def test_map_partition_array(func, pre):
40974093
from dask.array.utils import assert_eq
40984094

40994095
df = pd.DataFrame(
@@ -4102,17 +4098,16 @@ def test_map_partition_array(func):
41024098
)
41034099
ddf = dd.from_pandas(df, npartitions=2)
41044100

4105-
for pre in [lambda a: a, lambda a: a.x, lambda a: a.y, lambda a: a.index]:
4101+
try:
4102+
expected = func(pre(df))
4103+
except Exception:
4104+
return
41064105

4107-
try:
4108-
expected = func(pre(df))
4109-
except Exception:
4110-
continue
4111-
x = pre(ddf).map_partitions(func)
4112-
assert_eq(x, expected)
4106+
x = pre(ddf).map_partitions(func)
4107+
assert_eq(x, expected)
41134108

4114-
assert isinstance(x, da.Array)
4115-
assert x.chunks[0] == (np.nan, np.nan)
4109+
assert isinstance(x, da.Array)
4110+
assert x.chunks[0] == (np.nan, np.nan)
41164111

41174112

41184113
def test_map_partition_sparse():

0 commit comments

Comments
 (0)