Skip to content

Commit 75daea4

Browse files
DEPR: be stricter in assert_almost_equal (#52081)
* DEPR: be stricter in assert_almost_equal * 32bit builds * Fix transform test * ignore warning i cant reproduce localy * pylint fixup * Fix AarrayManager and CoW builds * fix tests * Whatsnew --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 2a270d8 commit 75daea4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+345
-106
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,10 @@ Deprecations
265265
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
266266
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
267267
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
268+
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
268269
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
269270
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
271+
-
270272

271273
.. ---------------------------------------------------------------------------
272274
.. _whatsnew_210.performance:

pandas/_libs/testing.pyx

+23-7
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
import cmath
22
import math
3+
import warnings
34

45
import numpy as np
56

67
from numpy cimport import_array
78

89
import_array()
910

10-
from pandas._libs.missing cimport checknull
11+
from pandas._libs.missing cimport (
12+
checknull,
13+
is_matching_na,
14+
)
1115
from pandas._libs.util cimport (
1216
is_array,
1317
is_complex_object,
1418
is_real_number_object,
1519
)
1620

21+
from pandas.util._exceptions import find_stack_level
22+
1723
from pandas.core.dtypes.missing import array_equivalent
1824

1925

@@ -176,13 +182,23 @@ cpdef assert_almost_equal(a, b,
176182
# classes can't be the same, to raise error
177183
assert_class_equal(a, b, obj=obj)
178184

179-
if checknull(a) and checknull(b):
180-
# TODO: Should require same-dtype NA?
185+
if checknull(a):
181186
# nan / None comparison
182-
return True
183-
184-
if (checknull(a) and not checknull(b)) or (not checknull(a) and checknull(b)):
185-
# boolean value of pd.NA is ambiguous
187+
if is_matching_na(a, b, nan_matches_none=False):
188+
return True
189+
elif checknull(b):
190+
# GH#18463
191+
warnings.warn(
192+
f"Mismatched null-like values {a} and {b} found. In a future "
193+
"version, pandas equality-testing functions "
194+
"(e.g. assert_frame_equal) will consider these not-matching "
195+
"and raise.",
196+
FutureWarning,
197+
stacklevel=find_stack_level(),
198+
)
199+
return True
200+
raise AssertionError(f"{a} != {b}")
201+
elif checknull(b):
186202
raise AssertionError(f"{a} != {b}")
187203

188204
if a == b:

pandas/core/dtypes/missing.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
TYPE_CHECKING,
1010
overload,
1111
)
12+
import warnings
1213

1314
import numpy as np
1415

@@ -573,17 +574,20 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo
573574
if not isinstance(right_value, float) or not np.isnan(right_value):
574575
return False
575576
else:
576-
try:
577-
if np.any(np.asarray(left_value != right_value)):
577+
with warnings.catch_warnings():
578+
# suppress numpy's "elementwise comparison failed"
579+
warnings.simplefilter("ignore", DeprecationWarning)
580+
try:
581+
if np.any(np.asarray(left_value != right_value)):
582+
return False
583+
except TypeError as err:
584+
if "boolean value of NA is ambiguous" in str(err):
585+
return False
586+
raise
587+
except ValueError:
588+
# numpy can raise a ValueError if left and right cannot be
589+
# compared (e.g. nested arrays)
578590
return False
579-
except TypeError as err:
580-
if "boolean value of NA is ambiguous" in str(err):
581-
return False
582-
raise
583-
except ValueError:
584-
# numpy can raise a ValueError if left and right cannot be
585-
# compared (e.g. nested arrays)
586-
return False
587591
return True
588592

589593

pandas/tests/arithmetic/test_timedelta64.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,10 @@ def test_td64arr_div_numeric_array(
20372037
if box_with_array is DataFrame:
20382038
expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))]
20392039
expected = tm.box_expected(expected, xbox).astype(object)
2040+
# We specifically expect timedelta64("NaT") here, not pd.NA
2041+
expected[2] = expected[2].fillna(
2042+
np.timedelta64("NaT", "ns"), downcast=False
2043+
)
20402044
else:
20412045
expected = [tdser[n] / vector[n] for n in range(len(tdser))]
20422046
expected = [
@@ -2113,9 +2117,12 @@ def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
21132117
left = tm.box_expected(tdi, box_with_array)
21142118
right = np.array([2, 2.0], dtype=object)
21152119

2116-
expected = Index([np.timedelta64("NaT", "ns")] * 2, dtype=object)
2120+
tdnat = np.timedelta64("NaT", "ns")
2121+
expected = Index([tdnat] * 2, dtype=object)
21172122
if box_with_array is not Index:
21182123
expected = tm.box_expected(expected, box_with_array).astype(object)
2124+
if box_with_array in [Series, DataFrame]:
2125+
expected = expected.fillna(tdnat, downcast=False) # GH#18463
21192126

21202127
result = left / right
21212128
tm.assert_equal(result, expected)

pandas/tests/arrays/integer/test_arithmetic.py

+4
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ def test_error_invalid_values(data, all_arithmetic_operators):
204204
]: # (data[~data.isna()] >= 0).all():
205205
res = ops(str_ser)
206206
expected = pd.Series(["foo" * x for x in data], index=s.index)
207+
expected = expected.fillna(np.nan)
208+
# TODO: doing this fillna to keep tests passing as we make
209+
# assert_almost_equal stricter, but the expected with pd.NA seems
210+
# more-correct than np.nan here.
207211
tm.assert_series_equal(res, expected)
208212
else:
209213
with pytest.raises(TypeError, match=msg):

pandas/tests/arrays/integer/test_construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def test_conversions(data_missing):
5151
# astype to object series
5252
df = pd.DataFrame({"A": data_missing})
5353
result = df["A"].astype("object")
54-
expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")
54+
expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A")
5555
tm.assert_series_equal(result, expected)
5656

5757
# convert to object ndarray

pandas/tests/extension/test_boolean.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
import numpy as np
1717
import pytest
1818

19-
from pandas.core.dtypes.common import is_bool_dtype
19+
from pandas.compat import (
20+
IS64,
21+
is_platform_windows,
22+
)
2023

2124
import pandas as pd
2225
import pandas._testing as tm
@@ -382,11 +385,18 @@ class TestUnaryOps(base.BaseUnaryOpsTests):
382385

383386
class TestAccumulation(base.BaseAccumulateTests):
384387
def check_accumulate(self, s, op_name, skipna):
388+
length = 64
389+
if not IS64 or is_platform_windows():
390+
if not s.dtype.itemsize == 8:
391+
length = 32
392+
385393
result = getattr(s, op_name)(skipna=skipna)
386394
expected = getattr(pd.Series(s.astype("float64")), op_name)(skipna=skipna)
387-
tm.assert_series_equal(result, expected, check_dtype=False)
388-
if op_name in ("cummin", "cummax"):
389-
assert is_bool_dtype(result)
395+
if op_name not in ("cummin", "cummax"):
396+
expected = expected.astype(f"Int{length}")
397+
else:
398+
expected = expected.astype("boolean")
399+
tm.assert_series_equal(result, expected)
390400

391401
@pytest.mark.parametrize("skipna", [True, False])
392402
def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna):

pandas/tests/frame/methods/test_compare.py

+4
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ def test_compare_ea_and_np_dtype(val1, val2):
265265
("b", "other"): np.nan,
266266
}
267267
)
268+
if val1 is pd.NA and val2 is pd.NA:
269+
# GH#18463 TODO: is this really the desired behavior?
270+
expected.loc[1, ("a", "self")] = np.nan
271+
268272
if val1 is pd.NA and is_numpy_dev:
269273
# can't compare with numpy array if it contains pd.NA
270274
with pytest.raises(TypeError, match="boolean value of NA is ambiguous"):

pandas/tests/frame/methods/test_quantile.py

+3
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,9 @@ def test_quantile_empty_no_rows_dt64(self, interp_method):
734734
0.5, numeric_only=False, interpolation=interpolation, method=method
735735
)
736736
exp = exp.astype(object)
737+
if interpolation == "nearest":
738+
# GH#18463 TODO: would we prefer NaTs here?
739+
exp = exp.fillna(np.nan, downcast=False)
737740
tm.assert_series_equal(res, exp)
738741

739742
# both dt64tz

pandas/tests/frame/methods/test_reindex.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,13 @@ def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
112112
.set_index("index")
113113
.reindex(["1", "2"])
114114
)
115+
exp = DataFrame({"index": ["1", "2"], "vals": [np.nan, np.nan]}).set_index(
116+
"index"
117+
)
118+
exp = exp.astype(object)
115119
tm.assert_frame_equal(
116120
df,
117-
DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"),
121+
exp,
118122
)
119123

120124

@@ -1191,7 +1195,7 @@ def test_reindex_empty_frame(self, kwargs):
11911195
idx = date_range(start="2020", freq="30s", periods=3)
11921196
df = DataFrame([], index=Index([], name="time"), columns=["a"])
11931197
result = df.reindex(idx, **kwargs)
1194-
expected = DataFrame({"a": [pd.NA] * 3}, index=idx)
1198+
expected = DataFrame({"a": [np.nan] * 3}, index=idx, dtype=object)
11951199
tm.assert_frame_equal(result, expected)
11961200

11971201
@pytest.mark.parametrize(

pandas/tests/frame/test_arithmetic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1245,19 +1245,19 @@ def test_operators_none_as_na(self, op):
12451245
filled = df.fillna(np.nan)
12461246
result = op(df, 3)
12471247
expected = op(filled, 3).astype(object)
1248-
expected[pd.isna(expected)] = None
1248+
expected[pd.isna(expected)] = np.nan
12491249
tm.assert_frame_equal(result, expected)
12501250

12511251
result = op(df, df)
12521252
expected = op(filled, filled).astype(object)
1253-
expected[pd.isna(expected)] = None
1253+
expected[pd.isna(expected)] = np.nan
12541254
tm.assert_frame_equal(result, expected)
12551255

12561256
result = op(df, df.fillna(7))
12571257
tm.assert_frame_equal(result, expected)
12581258

12591259
result = op(df.fillna(7), df)
1260-
tm.assert_frame_equal(result, expected, check_dtype=False)
1260+
tm.assert_frame_equal(result, expected)
12611261

12621262
@pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
12631263
# TODO: not sure what's correct here.

pandas/tests/frame/test_reductions.py

+3
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,14 @@ def wrapper(x):
331331
DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
332332
],
333333
)
334+
@pytest.mark.filterwarnings("ignore:Mismatched null-like values:FutureWarning")
334335
def test_stat_operators_attempt_obj_array(self, method, df, axis):
335336
# GH#676
336337
assert df.values.dtype == np.object_
337338
result = getattr(df, method)(axis=axis)
338339
expected = getattr(df.astype("f8"), method)(axis=axis).astype(object)
340+
if axis in [1, "columns"] and method in ["min", "max"]:
341+
expected[expected.isna()] = None
339342
tm.assert_series_equal(result, expected)
340343

341344
@pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])

pandas/tests/frame/test_stack_unstack.py

+4
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,10 @@ def test_unstack_mixed_extension_types(self, level):
11801180

11811181
result = df.unstack(level=level)
11821182
expected = df.astype(object).unstack(level=level)
1183+
if level == 0:
1184+
expected[("A", "B")] = expected[("A", "B")].fillna(pd.NA)
1185+
else:
1186+
expected[("A", 0)] = expected[("A", 0)].fillna(pd.NA)
11831187

11841188
expected_dtypes = Series(
11851189
[df.A.dtype] * 2 + [df.B.dtype] * 2, index=result.columns

pandas/tests/groupby/test_groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2994,7 +2994,7 @@ def test_groupby_sum_on_nan_should_return_nan(bug_var):
29942994
dfgb = df.groupby(lambda x: x)
29952995
result = dfgb.sum(min_count=1)
29962996

2997-
expected_df = DataFrame([bug_var, bug_var, bug_var, np.nan], columns=["A"])
2997+
expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
29982998
tm.assert_frame_equal(result, expected_df)
29992999

30003000

pandas/tests/groupby/test_nth.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_first_last_nth_dtypes(df_mixed_floats):
154154

155155
def test_first_last_nth_nan_dtype():
156156
# GH 33591
157-
df = DataFrame({"data": ["A"], "nans": Series([np.nan], dtype=object)})
157+
df = DataFrame({"data": ["A"], "nans": Series([None], dtype=object)})
158158
grouped = df.groupby("data")
159159

160160
expected = df.set_index("data").nans

pandas/tests/groupby/transform/test_transform.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,11 @@ def test_cython_transform_frame(op, args, targop):
746746
expected = gb.apply(targop)
747747

748748
expected = expected.sort_index(axis=1)
749+
if op == "shift":
750+
expected["string_missing"] = expected["string_missing"].fillna(
751+
np.nan, downcast=False
752+
)
753+
expected["string"] = expected["string"].fillna(np.nan, downcast=False)
749754

750755
result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
751756
tm.assert_frame_equal(result, expected)
@@ -772,8 +777,13 @@ def test_cython_transform_frame(op, args, targop):
772777
else:
773778
expected = gb[c].apply(targop)
774779
expected.name = c
775-
tm.assert_series_equal(expected, gb[c].transform(op, *args))
776-
tm.assert_series_equal(expected, getattr(gb[c], op)(*args))
780+
if c in ["string_missing", "string"]:
781+
expected = expected.fillna(np.nan, downcast=False)
782+
783+
res = gb[c].transform(op, *args)
784+
tm.assert_series_equal(expected, res)
785+
res2 = getattr(gb[c], op)(*args)
786+
tm.assert_series_equal(expected, res2)
777787

778788

779789
def test_transform_with_non_scalar_group():

pandas/tests/indexing/test_loc.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,9 @@ def test_loc_setitem_empty_append_expands_rows(self):
11631163
# GH6173, various appends to an empty dataframe
11641164

11651165
data = [1, 2, 3]
1166-
expected = DataFrame({"x": data, "y": [None] * len(data)})
1166+
expected = DataFrame(
1167+
{"x": data, "y": np.array([np.nan] * len(data), dtype=object)}
1168+
)
11671169

11681170
# appends to fit length of data
11691171
df = DataFrame(columns=["x", "y"])
@@ -1174,7 +1176,9 @@ def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self):
11741176
# GH#37932 same as test_loc_setitem_empty_append_expands_rows
11751177
# but with mixed dtype so we go through take_split_path
11761178
data = [1, 2, 3]
1177-
expected = DataFrame({"x": data, "y": [None] * len(data)})
1179+
expected = DataFrame(
1180+
{"x": data, "y": np.array([np.nan] * len(data), dtype=object)}
1181+
)
11781182

11791183
df = DataFrame(columns=["x", "y"])
11801184
df["x"] = df["x"].astype(np.int64)

pandas/tests/io/excel/test_readers.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1149,13 +1149,14 @@ def test_excel_old_index_format(self, read_ext):
11491149
# now be interpreted as rows that include null data.
11501150
data = np.array(
11511151
[
1152-
[None, None, None, None, None],
1152+
[np.nan, np.nan, np.nan, np.nan, np.nan],
11531153
["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"],
11541154
["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"],
11551155
["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"],
11561156
["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"],
11571157
["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"],
1158-
]
1158+
],
1159+
dtype=object,
11591160
)
11601161
columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"]
11611162
mi = MultiIndex(

pandas/tests/io/json/test_pandas.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -928,13 +928,15 @@ def test_doc_example(self):
928928
result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
929929
tm.assert_frame_equal(result, result)
930930

931-
def test_round_trip_exception_(self, datapath):
931+
def test_round_trip_exception(self, datapath):
932932
# GH 3867
933933
path = datapath("io", "json", "data", "teams.csv")
934934
df = pd.read_csv(path)
935935
s = df.to_json()
936936
result = read_json(s)
937-
tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)
937+
res = result.reindex(index=df.index, columns=df.columns)
938+
res = res.fillna(np.nan, downcast=False)
939+
tm.assert_frame_equal(res, df)
938940

939941
@pytest.mark.network
940942
@tm.network(
@@ -1747,7 +1749,7 @@ def test_emca_262_nan_inf_support(self):
17471749
data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
17481750
result = read_json(data)
17491751
expected = DataFrame(
1750-
["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
1752+
["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
17511753
)
17521754
tm.assert_frame_equal(result, expected)
17531755

0 commit comments

Comments
 (0)